1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 /********************************* COPYRIGHT NOTICE *******************************\
45 The function for RGB to Lab conversion is based on the MATLAB script
46 RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997.
47 See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html]
48 \**********************************************************************************/
49
50 /********************************* COPYRIGHT NOTICE *******************************\
51 Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
52 from MD-Mathematische Dienste GmbH. Below is the copyright notice:
53
54 IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
55 By downloading, copying, installing or using the software you agree
56 to this license. If you do not agree to this license, do not download,
57 install, copy or use the software.
58
59 Contributors License Agreement:
60
61 Copyright (c) 2002,
62 MD-Mathematische Dienste GmbH
63 Im Defdahl 5-10
64 44141 Dortmund
65 Germany
66 www.md-it.de
67
68 Redistribution and use in source and binary forms,
69 with or without modification, are permitted provided
70 that the following conditions are met:
71
72 Redistributions of source code must retain
73 the above copyright notice, this list of conditions and the following disclaimer.
74 Redistributions in binary form must reproduce the above copyright notice,
75 this list of conditions and the following disclaimer in the documentation
76 and/or other materials provided with the distribution.
77 The name of Contributor may not be used to endorse or promote products
78 derived from this software without specific prior written permission.
79
80 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
81 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
82 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
83 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
84 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
88 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
89 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
90 THE POSSIBILITY OF SUCH DAMAGE.
91 \**********************************************************************************/
92
93 #include "precomp.hpp"
94 #include "opencl_kernels_imgproc.hpp"
95 #include <limits>
96
97 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
98
99 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
100 #define MAX_IPP8u 255
101 #define MAX_IPP16u 65535
102 #define MAX_IPP32f 1.0
103 static IppStatus sts = ippInit();
104 #endif
105
106 namespace cv
107 {
108
109 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
splineBuild(const _Tp * f,int n,_Tp * tab)110 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
111 {
112 _Tp cn = 0;
113 int i;
114 tab[0] = tab[1] = (_Tp)0;
115
116 for(i = 1; i < n-1; i++)
117 {
118 _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]);
119 _Tp l = 1/(4 - tab[(i-1)*4]);
120 tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l;
121 }
122
123 for(i = n-1; i >= 0; i--)
124 {
125 _Tp c = tab[i*4+1] - tab[i*4]*cn;
126 _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333;
127 _Tp d = (cn - c)*(_Tp)0.3333333333333333;
128 tab[i*4] = f[i]; tab[i*4+1] = b;
129 tab[i*4+2] = c; tab[i*4+3] = d;
130 cn = c;
131 }
132 }
133
134 // interpolates value of a function at x, 0 <= x <= n using a cubic spline.
splineInterpolate(_Tp x,const _Tp * tab,int n)135 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
136 {
137 // don't touch this function without urgent need - some versions of gcc fail to inline it correctly
138 int ix = std::min(std::max(int(x), 0), n-1);
139 x -= ix;
140 tab += ix*4;
141 return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0];
142 }
143
144
145 template<typename _Tp> struct ColorChannel
146 {
147 typedef float worktype_f;
maxcv::ColorChannel148 static _Tp max() { return std::numeric_limits<_Tp>::max(); }
halfcv::ColorChannel149 static _Tp half() { return (_Tp)(max()/2 + 1); }
150 };
151
152 template<> struct ColorChannel<float>
153 {
154 typedef float worktype_f;
maxcv::ColorChannel155 static float max() { return 1.f; }
halfcv::ColorChannel156 static float half() { return 0.5f; }
157 };
158
159 /*template<> struct ColorChannel<double>
160 {
161 typedef double worktype_f;
162 static double max() { return 1.; }
163 static double half() { return 0.5; }
164 };*/
165
166
167 ///////////////////////////// Top-level template function ////////////////////////////////
168
169 template <typename Cvt>
170 class CvtColorLoop_Invoker : public ParallelLoopBody
171 {
172 typedef typename Cvt::channel_type _Tp;
173 public:
174
CvtColorLoop_Invoker(const Mat & _src,Mat & _dst,const Cvt & _cvt)175 CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) :
176 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt)
177 {
178 }
179
operator ()(const Range & range) const180 virtual void operator()(const Range& range) const
181 {
182 const uchar* yS = src.ptr<uchar>(range.start);
183 uchar* yD = dst.ptr<uchar>(range.start);
184
185 for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step )
186 cvt((const _Tp*)yS, (_Tp*)yD, src.cols);
187 }
188
189 private:
190 const Mat& src;
191 Mat& dst;
192 const Cvt& cvt;
193
194 const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
195 };
196
197 template <typename Cvt>
CvtColorLoop(const Mat & src,Mat & dst,const Cvt & cvt)198 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
199 {
200 parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
201 }
202
203 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
204
205 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
206 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
207 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
208
209 template <typename Cvt>
210 class CvtColorIPPLoop_Invoker :
211 public ParallelLoopBody
212 {
213 public:
214
CvtColorIPPLoop_Invoker(const Mat & _src,Mat & _dst,const Cvt & _cvt,bool * _ok)215 CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) :
216 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok)
217 {
218 *ok = true;
219 }
220
operator ()(const Range & range) const221 virtual void operator()(const Range& range) const
222 {
223 const void *yS = src.ptr<uchar>(range.start);
224 void *yD = dst.ptr<uchar>(range.start);
225 if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) )
226 *ok = false;
227 else
228 {
229 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
230 }
231 }
232
233 private:
234 const Mat& src;
235 Mat& dst;
236 const Cvt& cvt;
237 bool *ok;
238
239 const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
240 };
241
242 template <typename Cvt>
CvtColorIPPLoop(const Mat & src,Mat & dst,const Cvt & cvt)243 bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt)
244 {
245 bool ok;
246 parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) );
247 return ok;
248 }
249
250 template <typename Cvt>
CvtColorIPPLoopCopy(Mat & src,Mat & dst,const Cvt & cvt)251 bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt)
252 {
253 Mat temp;
254 Mat &source = src;
255 if( src.data == dst.data )
256 {
257 src.copyTo(temp);
258 source = temp;
259 }
260 bool ok;
261 parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok),
262 source.total()/(double)(1<<16) );
263 return ok;
264 }
265
ippiSwapChannels_8u_C3C4Rf(const Ipp8u * pSrc,int srcStep,Ipp8u * pDst,int dstStep,IppiSize roiSize,const int * dstOrder)266 static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep,
267 IppiSize roiSize, const int *dstOrder)
268 {
269 return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u);
270 }
271
ippiSwapChannels_16u_C3C4Rf(const Ipp16u * pSrc,int srcStep,Ipp16u * pDst,int dstStep,IppiSize roiSize,const int * dstOrder)272 static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep,
273 IppiSize roiSize, const int *dstOrder)
274 {
275 return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u);
276 }
277
ippiSwapChannels_32f_C3C4Rf(const Ipp32f * pSrc,int srcStep,Ipp32f * pDst,int dstStep,IppiSize roiSize,const int * dstOrder)278 static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep,
279 IppiSize roiSize, const int *dstOrder)
280 {
281 return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f);
282 }
283
284 static ippiReorderFunc ippiSwapChannelsC3C4RTab[] =
285 {
286 (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0,
287 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0
288 };
289
290 static ippiGeneralFunc ippiCopyAC4C3RTab[] =
291 {
292 (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0,
293 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0
294 };
295
296 static ippiReorderFunc ippiSwapChannelsC4C3RTab[] =
297 {
298 (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0,
299 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0
300 };
301
302 static ippiReorderFunc ippiSwapChannelsC3RTab[] =
303 {
304 (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0,
305 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0
306 };
307
308 #if IPP_VERSION_X100 >= 801
309 static ippiReorderFunc ippiSwapChannelsC4RTab[] =
310 {
311 (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0,
312 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0
313 };
314 #endif
315
316 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] =
317 {
318 (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0,
319 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0
320 };
321
322 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] =
323 {
324 (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0,
325 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0
326 };
327
328 static ippiGeneralFunc ippiRGB2GrayC3Tab[] =
329 {
330 (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0,
331 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0
332 };
333
334 static ippiGeneralFunc ippiRGB2GrayC4Tab[] =
335 {
336 (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0,
337 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
338 };
339
340 static ippiGeneralFunc ippiCopyP3C3RTab[] =
341 {
342 (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
343 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
344 };
345
346 static ippiGeneralFunc ippiRGB2XYZTab[] =
347 {
348 (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0,
349 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0
350 };
351
352 static ippiGeneralFunc ippiXYZ2RGBTab[] =
353 {
354 (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0,
355 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0
356 };
357
358 static ippiGeneralFunc ippiRGB2HSVTab[] =
359 {
360 (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0,
361 0, 0, 0, 0
362 };
363
364 static ippiGeneralFunc ippiHSV2RGBTab[] =
365 {
366 (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0,
367 0, 0, 0, 0
368 };
369
370 static ippiGeneralFunc ippiRGB2HLSTab[] =
371 {
372 (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0,
373 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0
374 };
375
376 static ippiGeneralFunc ippiHLS2RGBTab[] =
377 {
378 (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0,
379 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0
380 };
381
382 #if !defined(HAVE_IPP_ICV_ONLY) && 0
383 static ippiGeneralFunc ippiRGBToLUVTab[] =
384 {
385 (ippiGeneralFunc)ippiRGBToLUV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToLUV_16u_C3R, 0,
386 0, (ippiGeneralFunc)ippiRGBToLUV_32f_C3R, 0, 0
387 };
388
389 static ippiGeneralFunc ippiLUVToRGBTab[] =
390 {
391 (ippiGeneralFunc)ippiLUVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiLUVToRGB_16u_C3R, 0,
392 0, (ippiGeneralFunc)ippiLUVToRGB_32f_C3R, 0, 0
393 };
394 #endif
395
396 struct IPPGeneralFunctor
397 {
IPPGeneralFunctorcv::IPPGeneralFunctor398 IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){}
operator ()cv::IPPGeneralFunctor399 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
400 {
401 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
402 }
403 private:
404 ippiGeneralFunc func;
405 };
406
407 struct IPPReorderFunctor
408 {
IPPReorderFunctorcv::IPPReorderFunctor409 IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func)
410 {
411 order[0] = _order0;
412 order[1] = _order1;
413 order[2] = _order2;
414 order[3] = 3;
415 }
operator ()cv::IPPReorderFunctor416 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
417 {
418 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
419 }
420 private:
421 ippiReorderFunc func;
422 int order[4];
423 };
424
425 struct IPPColor2GrayFunctor
426 {
IPPColor2GrayFunctorcv::IPPColor2GrayFunctor427 IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
428 func(_func)
429 {
430 coeffs[0] = 0.114f;
431 coeffs[1] = 0.587f;
432 coeffs[2] = 0.299f;
433 }
operator ()cv::IPPColor2GrayFunctor434 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
435 {
436 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false;
437 }
438 private:
439 ippiColor2GrayFunc func;
440 Ipp32f coeffs[3];
441 };
442
443 struct IPPGray2BGRFunctor
444 {
IPPGray2BGRFunctorcv::IPPGray2BGRFunctor445 IPPGray2BGRFunctor(ippiGeneralFunc _func) :
446 func(_func)
447 {
448 }
449
operator ()cv::IPPGray2BGRFunctor450 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
451 {
452 if (func == 0)
453 return false;
454
455 const void* srcarray[3] = { src, src, src };
456 return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
457 }
458 private:
459 ippiGeneralFunc func;
460 };
461
462 struct IPPGray2BGRAFunctor
463 {
IPPGray2BGRAFunctorcv::IPPGray2BGRAFunctor464 IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
465 func1(_func1), func2(_func2), depth(_depth)
466 {
467 }
468
operator ()cv::IPPGray2BGRAFunctor469 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
470 {
471 if (func1 == 0 || func2 == 0)
472 return false;
473
474 const void* srcarray[3] = { src, src, src };
475 Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
476 if(func1(srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
477 return false;
478 int order[4] = {0, 1, 2, 3};
479 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
480 }
481 private:
482 ippiGeneralFunc func1;
483 ippiReorderFunc func2;
484 int depth;
485 };
486
487 struct IPPReorderGeneralFunctor
488 {
IPPReorderGeneralFunctorcv::IPPReorderGeneralFunctor489 IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
490 func1(_func1), func2(_func2), depth(_depth)
491 {
492 order[0] = _order0;
493 order[1] = _order1;
494 order[2] = _order2;
495 order[3] = 3;
496 }
operator ()cv::IPPReorderGeneralFunctor497 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
498 {
499 if (func1 == 0 || func2 == 0)
500 return false;
501
502 Mat temp;
503 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
504 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0)
505 return false;
506 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
507 }
508 private:
509 ippiReorderFunc func1;
510 ippiGeneralFunc func2;
511 int order[4];
512 int depth;
513 };
514
515 struct IPPGeneralReorderFunctor
516 {
IPPGeneralReorderFunctorcv::IPPGeneralReorderFunctor517 IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
518 func1(_func1), func2(_func2), depth(_depth)
519 {
520 order[0] = _order0;
521 order[1] = _order1;
522 order[2] = _order2;
523 order[3] = 3;
524 }
operator ()cv::IPPGeneralReorderFunctor525 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
526 {
527 if (func1 == 0 || func2 == 0)
528 return false;
529
530 Mat temp;
531 temp.create(rows, cols, CV_MAKETYPE(depth, 3));
532 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
533 return false;
534 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
535 }
536 private:
537 ippiGeneralFunc func1;
538 ippiReorderFunc func2;
539 int order[4];
540 int depth;
541 };
542
543 #endif
544
545 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
546
547 template<typename _Tp> struct RGB2RGB
548 {
549 typedef _Tp channel_type;
550
RGB2RGBcv::RGB2RGB551 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
operator ()cv::RGB2RGB552 void operator()(const _Tp* src, _Tp* dst, int n) const
553 {
554 int scn = srccn, dcn = dstcn, bidx = blueIdx;
555 if( dcn == 3 )
556 {
557 n *= 3;
558 for( int i = 0; i < n; i += 3, src += scn )
559 {
560 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
561 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
562 }
563 }
564 else if( scn == 3 )
565 {
566 n *= 3;
567 _Tp alpha = ColorChannel<_Tp>::max();
568 for( int i = 0; i < n; i += 3, dst += 4 )
569 {
570 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
571 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
572 }
573 }
574 else
575 {
576 n *= 4;
577 for( int i = 0; i < n; i += 4 )
578 {
579 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
580 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
581 }
582 }
583 }
584
585 int srccn, dstcn, blueIdx;
586 };
587
588 #if CV_NEON
589
590 template<> struct RGB2RGB<uchar>
591 {
592 typedef uchar channel_type;
593
RGB2RGBcv::RGB2RGB594 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
595 srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
596 {
597 v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
598 v_alpha2 = vget_low_u8(v_alpha);
599 }
600
operator ()cv::RGB2RGB601 void operator()(const uchar * src, uchar * dst, int n) const
602 {
603 int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
604 if (dcn == 3)
605 {
606 n *= 3;
607 if (scn == 3)
608 {
609 for ( ; i <= n - 48; i += 48, src += 48 )
610 {
611 uint8x16x3_t v_src = vld3q_u8(src), v_dst;
612 v_dst.val[0] = v_src.val[bidx];
613 v_dst.val[1] = v_src.val[1];
614 v_dst.val[2] = v_src.val[bidx ^ 2];
615 vst3q_u8(dst + i, v_dst);
616 }
617 for ( ; i <= n - 24; i += 24, src += 24 )
618 {
619 uint8x8x3_t v_src = vld3_u8(src), v_dst;
620 v_dst.val[0] = v_src.val[bidx];
621 v_dst.val[1] = v_src.val[1];
622 v_dst.val[2] = v_src.val[bidx ^ 2];
623 vst3_u8(dst + i, v_dst);
624 }
625 for ( ; i < n; i += 3, src += 3 )
626 {
627 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
628 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
629 }
630 }
631 else
632 {
633 for ( ; i <= n - 48; i += 48, src += 64 )
634 {
635 uint8x16x4_t v_src = vld4q_u8(src);
636 uint8x16x3_t v_dst;
637 v_dst.val[0] = v_src.val[bidx];
638 v_dst.val[1] = v_src.val[1];
639 v_dst.val[2] = v_src.val[bidx ^ 2];
640 vst3q_u8(dst + i, v_dst);
641 }
642 for ( ; i <= n - 24; i += 24, src += 32 )
643 {
644 uint8x8x4_t v_src = vld4_u8(src);
645 uint8x8x3_t v_dst;
646 v_dst.val[0] = v_src.val[bidx];
647 v_dst.val[1] = v_src.val[1];
648 v_dst.val[2] = v_src.val[bidx ^ 2];
649 vst3_u8(dst + i, v_dst);
650 }
651 for ( ; i < n; i += 3, src += 4 )
652 {
653 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
654 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
655 }
656 }
657 }
658 else if (scn == 3)
659 {
660 n *= 3;
661 for ( ; i <= n - 48; i += 48, dst += 64 )
662 {
663 uint8x16x3_t v_src = vld3q_u8(src + i);
664 uint8x16x4_t v_dst;
665 v_dst.val[bidx] = v_src.val[0];
666 v_dst.val[1] = v_src.val[1];
667 v_dst.val[bidx ^ 2] = v_src.val[2];
668 v_dst.val[3] = v_alpha;
669 vst4q_u8(dst, v_dst);
670 }
671 for ( ; i <= n - 24; i += 24, dst += 32 )
672 {
673 uint8x8x3_t v_src = vld3_u8(src + i);
674 uint8x8x4_t v_dst;
675 v_dst.val[bidx] = v_src.val[0];
676 v_dst.val[1] = v_src.val[1];
677 v_dst.val[bidx ^ 2] = v_src.val[2];
678 v_dst.val[3] = v_alpha2;
679 vst4_u8(dst, v_dst);
680 }
681 uchar alpha = ColorChannel<uchar>::max();
682 for (; i < n; i += 3, dst += 4 )
683 {
684 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
685 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
686 }
687 }
688 else
689 {
690 n *= 4;
691 for ( ; i <= n - 64; i += 64 )
692 {
693 uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
694 v_dst.val[0] = v_src.val[2];
695 v_dst.val[1] = v_src.val[1];
696 v_dst.val[2] = v_src.val[0];
697 v_dst.val[3] = v_src.val[3];
698 vst4q_u8(dst + i, v_dst);
699 }
700 for ( ; i <= n - 32; i += 32 )
701 {
702 uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
703 v_dst.val[0] = v_src.val[2];
704 v_dst.val[1] = v_src.val[1];
705 v_dst.val[2] = v_src.val[0];
706 v_dst.val[3] = v_src.val[3];
707 vst4_u8(dst + i, v_dst);
708 }
709 for ( ; i < n; i += 4)
710 {
711 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
712 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3;
713 }
714 }
715 }
716
717 int srccn, dstcn, blueIdx;
718
719 uint8x16_t v_alpha;
720 uint8x8_t v_alpha2;
721 };
722
723 #endif
724
725 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
726
727 struct RGB5x52RGB
728 {
729 typedef uchar channel_type;
730
RGB5x52RGBcv::RGB5x52RGB731 RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
732 : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits)
733 {
734 #if CV_NEON
735 v_n3 = vdupq_n_u16(~3);
736 v_n7 = vdupq_n_u16(~7);
737 v_255 = vdupq_n_u8(255);
738 v_0 = vdupq_n_u8(0);
739 v_mask = vdupq_n_u16(0x8000);
740 #endif
741 }
742
operator ()cv::RGB5x52RGB743 void operator()(const uchar* src, uchar* dst, int n) const
744 {
745 int dcn = dstcn, bidx = blueIdx, i = 0;
746 if( greenBits == 6 )
747 {
748 #if CV_NEON
749 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
750 {
751 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
752 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
753 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 3), v_n3)),
754 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 3), v_n3)));
755 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 8), v_n7)),
756 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 8), v_n7)));
757 if (dcn == 3)
758 {
759 uint8x16x3_t v_dst;
760 v_dst.val[bidx] = v_b;
761 v_dst.val[1] = v_g;
762 v_dst.val[bidx^2] = v_r;
763 vst3q_u8(dst, v_dst);
764 }
765 else
766 {
767 uint8x16x4_t v_dst;
768 v_dst.val[bidx] = v_b;
769 v_dst.val[1] = v_g;
770 v_dst.val[bidx^2] = v_r;
771 v_dst.val[3] = v_255;
772 vst4q_u8(dst, v_dst);
773 }
774 }
775 #endif
776 for( ; i < n; i++, dst += dcn )
777 {
778 unsigned t = ((const ushort*)src)[i];
779 dst[bidx] = (uchar)(t << 3);
780 dst[1] = (uchar)((t >> 3) & ~3);
781 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
782 if( dcn == 4 )
783 dst[3] = 255;
784 }
785 }
786 else
787 {
788 #if CV_NEON
789 for ( ; i <= n - 16; i += 16, dst += dcn * 16)
790 {
791 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8);
792 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3)));
793 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 2), v_n7)),
794 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 2), v_n7)));
795 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 7), v_n7)),
796 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 7), v_n7)));
797 if (dcn == 3)
798 {
799 uint8x16x3_t v_dst;
800 v_dst.val[bidx] = v_b;
801 v_dst.val[1] = v_g;
802 v_dst.val[bidx^2] = v_r;
803 vst3q_u8(dst, v_dst);
804 }
805 else
806 {
807 uint8x16x4_t v_dst;
808 v_dst.val[bidx] = v_b;
809 v_dst.val[1] = v_g;
810 v_dst.val[bidx^2] = v_r;
811 v_dst.val[3] = vbslq_u8(vcombine_u8(vqmovn_u16(vandq_u16(v_src0, v_mask)),
812 vqmovn_u16(vandq_u16(v_src1, v_mask))), v_255, v_0);
813 vst4q_u8(dst, v_dst);
814 }
815 }
816 #endif
817 for( ; i < n; i++, dst += dcn )
818 {
819 unsigned t = ((const ushort*)src)[i];
820 dst[bidx] = (uchar)(t << 3);
821 dst[1] = (uchar)((t >> 2) & ~7);
822 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
823 if( dcn == 4 )
824 dst[3] = t & 0x8000 ? 255 : 0;
825 }
826 }
827 }
828
829 int dstcn, blueIdx, greenBits;
830 #if CV_NEON
831 uint16x8_t v_n3, v_n7, v_mask;
832 uint8x16_t v_255, v_0;
833 #endif
834 };
835
836
837 struct RGB2RGB5x5
838 {
839 typedef uchar channel_type;
840
RGB2RGB5x5cv::RGB2RGB5x5841 RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits)
842 : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits)
843 {
844 #if CV_NEON
845 v_n3 = vdup_n_u8(~3);
846 v_n7 = vdup_n_u8(~7);
847 v_mask = vdupq_n_u16(0x8000);
848 v_0 = vdupq_n_u16(0);
849 v_full = vdupq_n_u16(0xffff);
850 #endif
851 }
852
operator ()cv::RGB2RGB5x5853 void operator()(const uchar* src, uchar* dst, int n) const
854 {
855 int scn = srccn, bidx = blueIdx, i = 0;
856 if (greenBits == 6)
857 {
858 if (scn == 3)
859 {
860 #if CV_NEON
861 for ( ; i <= n - 8; i += 8, src += 24 )
862 {
863 uint8x8x3_t v_src = vld3_u8(src);
864 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
865 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
866 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
867 vst1q_u16((ushort *)dst + i, v_dst);
868 }
869 #endif
870 for ( ; i < n; i++, src += 3 )
871 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
872 }
873 else
874 {
875 #if CV_NEON
876 for ( ; i <= n - 8; i += 8, src += 32 )
877 {
878 uint8x8x4_t v_src = vld4_u8(src);
879 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
880 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3));
881 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8));
882 vst1q_u16((ushort *)dst + i, v_dst);
883 }
884 #endif
885 for ( ; i < n; i++, src += 4 )
886 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8));
887 }
888 }
889 else if (scn == 3)
890 {
891 #if CV_NEON
892 for ( ; i <= n - 8; i += 8, src += 24 )
893 {
894 uint8x8x3_t v_src = vld3_u8(src);
895 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
896 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
897 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7));
898 vst1q_u16((ushort *)dst + i, v_dst);
899 }
900 #endif
901 for ( ; i < n; i++, src += 3 )
902 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7));
903 }
904 else
905 {
906 #if CV_NEON
907 for ( ; i <= n - 8; i += 8, src += 32 )
908 {
909 uint8x8x4_t v_src = vld4_u8(src);
910 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3));
911 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2));
912 v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7),
913 vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0)));
914 vst1q_u16((ushort *)dst + i, v_dst);
915 }
916 #endif
917 for ( ; i < n; i++, src += 4 )
918 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|
919 ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0));
920 }
921 }
922
923 int srccn, blueIdx, greenBits;
924 #if CV_NEON
925 uint8x8_t v_n3, v_n7;
926 uint16x8_t v_mask, v_0, v_full;
927 #endif
928 };
929
930 ///////////////////////////////// Color to/from Grayscale ////////////////////////////////
931
932 template<typename _Tp>
933 struct Gray2RGB
934 {
935 typedef _Tp channel_type;
936
Gray2RGBcv::Gray2RGB937 Gray2RGB(int _dstcn) : dstcn(_dstcn) {}
operator ()cv::Gray2RGB938 void operator()(const _Tp* src, _Tp* dst, int n) const
939 {
940 if( dstcn == 3 )
941 for( int i = 0; i < n; i++, dst += 3 )
942 {
943 dst[0] = dst[1] = dst[2] = src[i];
944 }
945 else
946 {
947 _Tp alpha = ColorChannel<_Tp>::max();
948 for( int i = 0; i < n; i++, dst += 4 )
949 {
950 dst[0] = dst[1] = dst[2] = src[i];
951 dst[3] = alpha;
952 }
953 }
954 }
955
956 int dstcn;
957 };
958
959
960 struct Gray2RGB5x5
961 {
962 typedef uchar channel_type;
963
Gray2RGB5x5cv::Gray2RGB5x5964 Gray2RGB5x5(int _greenBits) : greenBits(_greenBits)
965 {
966 #if CV_NEON
967 v_n7 = vdup_n_u8(~7);
968 v_n3 = vdup_n_u8(~3);
969 #elif CV_SSE2
970 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
971 v_n7 = _mm_set1_epi16(~7);
972 v_n3 = _mm_set1_epi16(~3);
973 v_zero = _mm_setzero_si128();
974 #endif
975 }
976
operator ()cv::Gray2RGB5x5977 void operator()(const uchar* src, uchar* dst, int n) const
978 {
979 int i = 0;
980 if( greenBits == 6 )
981 {
982 #if CV_NEON
983 for ( ; i <= n - 8; i += 8 )
984 {
985 uint8x8_t v_src = vld1_u8(src + i);
986 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3));
987 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3));
988 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8));
989 vst1q_u16((ushort *)dst + i, v_dst);
990 }
991 #elif CV_SSE2
992 if (haveSIMD)
993 {
994 for ( ; i <= n - 16; i += 16 )
995 {
996 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i));
997
998 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero);
999 __m128i v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3),
1000 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3),
1001 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8)));
1002 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst);
1003
1004 v_src_p = _mm_unpackhi_epi8(v_src, v_zero);
1005 v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3),
1006 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3),
1007 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8)));
1008 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst);
1009 }
1010 }
1011 #endif
1012 for ( ; i < n; i++ )
1013 {
1014 int t = src[i];
1015 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
1016 }
1017 }
1018 else
1019 {
1020 #if CV_NEON
1021 for ( ; i <= n - 8; i += 8 )
1022 {
1023 uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3));
1024 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10));
1025 vst1q_u16((ushort *)dst + i, v_dst);
1026 }
1027 #elif CV_SSE2
1028 if (haveSIMD)
1029 {
1030 for ( ; i <= n - 16; i += 8 )
1031 {
1032 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i));
1033
1034 __m128i v_src_p = _mm_srli_epi16(_mm_unpacklo_epi8(v_src, v_zero), 3);
1035 __m128i v_dst = _mm_or_si128(v_src_p,
1036 _mm_or_si128(_mm_slli_epi32(v_src_p, 5),
1037 _mm_slli_epi16(v_src_p, 10)));
1038 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst);
1039
1040 v_src_p = _mm_srli_epi16(_mm_unpackhi_epi8(v_src, v_zero), 3);
1041 v_dst = _mm_or_si128(v_src_p,
1042 _mm_or_si128(_mm_slli_epi16(v_src_p, 5),
1043 _mm_slli_epi16(v_src_p, 10)));
1044 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst);
1045 }
1046 }
1047 #endif
1048 for( ; i < n; i++ )
1049 {
1050 int t = src[i] >> 3;
1051 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10));
1052 }
1053 }
1054 }
1055 int greenBits;
1056
1057 #if CV_NEON
1058 uint8x8_t v_n7, v_n3;
1059 #elif CV_SSE2
1060 __m128i v_n7, v_n3, v_zero;
1061 bool haveSIMD;
1062 #endif
1063 };
1064
1065
1066 #undef R2Y
1067 #undef G2Y
1068 #undef B2Y
1069
1070 enum
1071 {
1072 yuv_shift = 14,
1073 xyz_shift = 12,
1074 R2Y = 4899,
1075 G2Y = 9617,
1076 B2Y = 1868,
1077 BLOCK_SIZE = 256
1078 };
1079
1080
1081 struct RGB5x52Gray
1082 {
1083 typedef uchar channel_type;
1084
RGB5x52Graycv::RGB5x52Gray1085 RGB5x52Gray(int _greenBits) : greenBits(_greenBits)
1086 {
1087 #if CV_NEON
1088 v_b2y = vdup_n_u16(B2Y);
1089 v_g2y = vdup_n_u16(G2Y);
1090 v_r2y = vdup_n_u16(R2Y);
1091 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1092 v_f8 = vdupq_n_u16(0xf8);
1093 v_fc = vdupq_n_u16(0xfc);
1094 #elif CV_SSE2
1095 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
1096 v_b2y = _mm_set1_epi16(B2Y);
1097 v_g2y = _mm_set1_epi16(G2Y);
1098 v_r2y = _mm_set1_epi16(R2Y);
1099 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1));
1100 v_f8 = _mm_set1_epi16(0xf8);
1101 v_fc = _mm_set1_epi16(0xfc);
1102 #endif
1103 }
1104
operator ()cv::RGB5x52Gray1105 void operator()(const uchar* src, uchar* dst, int n) const
1106 {
1107 int i = 0;
1108 if( greenBits == 6 )
1109 {
1110 #if CV_NEON
1111 for ( ; i <= n - 8; i += 8)
1112 {
1113 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1114 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1115 v_t1 = vandq_u16(vshrq_n_u16(v_src, 3), v_fc),
1116 v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8);
1117
1118 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1119 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1120 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1121 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1122 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1123 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1124
1125 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1126 }
1127 #elif CV_SSE2
1128 if (haveSIMD)
1129 {
1130 __m128i v_zero = _mm_setzero_si128();
1131
1132 for ( ; i <= n - 8; i += 8)
1133 {
1134 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i));
1135 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8),
1136 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 3), v_fc),
1137 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 8), v_f8);
1138
1139 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y);
1140 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y);
1141 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y);
1142 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y);
1143 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y);
1144 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y);
1145
1146 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b),
1147 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g));
1148 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta),
1149 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r));
1150
1151 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b),
1152 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g));
1153 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta),
1154 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r));
1155
1156 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift);
1157 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift);
1158
1159 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1);
1160 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero));
1161 }
1162 }
1163 #endif
1164 for ( ; i < n; i++)
1165 {
1166 int t = ((ushort*)src)[i];
1167 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1168 ((t >> 3) & 0xfc)*G2Y +
1169 ((t >> 8) & 0xf8)*R2Y, yuv_shift);
1170 }
1171 }
1172 else
1173 {
1174 #if CV_NEON
1175 for ( ; i <= n - 8; i += 8)
1176 {
1177 uint16x8_t v_src = vld1q_u16((ushort *)src + i);
1178 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8),
1179 v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8),
1180 v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8);
1181
1182 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y),
1183 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y);
1184 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y),
1185 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y);
1186 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift);
1187 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift);
1188
1189 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))));
1190 }
1191 #elif CV_SSE2
1192 if (haveSIMD)
1193 {
1194 __m128i v_zero = _mm_setzero_si128();
1195
1196 for ( ; i <= n - 8; i += 8)
1197 {
1198 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i));
1199 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8),
1200 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 2), v_f8),
1201 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 7), v_f8);
1202
1203 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y);
1204 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y);
1205 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y);
1206 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y);
1207 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y);
1208 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y);
1209
1210 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b),
1211 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g));
1212 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta),
1213 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r));
1214
1215 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b),
1216 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g));
1217 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta),
1218 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r));
1219
1220 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift);
1221 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift);
1222
1223 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1);
1224 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero));
1225 }
1226 }
1227 #endif
1228 for ( ; i < n; i++)
1229 {
1230 int t = ((ushort*)src)[i];
1231 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
1232 ((t >> 2) & 0xf8)*G2Y +
1233 ((t >> 7) & 0xf8)*R2Y, yuv_shift);
1234 }
1235 }
1236 }
1237 int greenBits;
1238
1239 #if CV_NEON
1240 uint16x4_t v_b2y, v_g2y, v_r2y;
1241 uint32x4_t v_delta;
1242 uint16x8_t v_f8, v_fc;
1243 #elif CV_SSE2
1244 bool haveSIMD;
1245 __m128i v_b2y, v_g2y, v_r2y;
1246 __m128i v_delta;
1247 __m128i v_f8, v_fc;
1248 #endif
1249 };
1250
1251
1252 template<typename _Tp> struct RGB2Gray
1253 {
1254 typedef _Tp channel_type;
1255
RGB2Graycv::RGB2Gray1256 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1257 {
1258 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1259 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1260 if(blueIdx == 0)
1261 std::swap(coeffs[0], coeffs[2]);
1262 }
1263
operator ()cv::RGB2Gray1264 void operator()(const _Tp* src, _Tp* dst, int n) const
1265 {
1266 int scn = srccn;
1267 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1268 for(int i = 0; i < n; i++, src += scn)
1269 dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr);
1270 }
1271 int srccn;
1272 float coeffs[3];
1273 };
1274
1275 template<> struct RGB2Gray<uchar>
1276 {
1277 typedef uchar channel_type;
1278
RGB2Graycv::RGB2Gray1279 RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn)
1280 {
1281 const int coeffs0[] = { R2Y, G2Y, B2Y };
1282 if(!coeffs) coeffs = coeffs0;
1283
1284 int b = 0, g = 0, r = (1 << (yuv_shift-1));
1285 int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx];
1286
1287 for( int i = 0; i < 256; i++, b += db, g += dg, r += dr )
1288 {
1289 tab[i] = b;
1290 tab[i+256] = g;
1291 tab[i+512] = r;
1292 }
1293 }
operator ()cv::RGB2Gray1294 void operator()(const uchar* src, uchar* dst, int n) const
1295 {
1296 int scn = srccn;
1297 const int* _tab = tab;
1298 for(int i = 0; i < n; i++, src += scn)
1299 dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift);
1300 }
1301 int srccn;
1302 int tab[256*3];
1303 };
1304
1305 #if CV_NEON
1306
1307 template <>
1308 struct RGB2Gray<ushort>
1309 {
1310 typedef ushort channel_type;
1311
RGB2Graycv::RGB2Gray1312 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) :
1313 srccn(_srccn)
1314 {
1315 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1316 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1317 if( blueIdx == 0 )
1318 std::swap(coeffs[0], coeffs[2]);
1319
1320 v_cb = vdup_n_u16(coeffs[0]);
1321 v_cg = vdup_n_u16(coeffs[1]);
1322 v_cr = vdup_n_u16(coeffs[2]);
1323 v_delta = vdupq_n_u32(1 << (yuv_shift - 1));
1324 }
1325
operator ()cv::RGB2Gray1326 void operator()(const ushort* src, ushort* dst, int n) const
1327 {
1328 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0;
1329
1330 for ( ; i <= n - 8; i += 8, src += scn * 8)
1331 {
1332 uint16x8_t v_b, v_r, v_g;
1333 if (scn == 3)
1334 {
1335 uint16x8x3_t v_src = vld3q_u16(src);
1336 v_b = v_src.val[0];
1337 v_g = v_src.val[1];
1338 v_r = v_src.val[2];
1339 }
1340 else
1341 {
1342 uint16x8x4_t v_src = vld4q_u16(src);
1343 v_b = v_src.val[0];
1344 v_g = v_src.val[1];
1345 v_r = v_src.val[2];
1346 }
1347
1348 uint32x4_t v_dst0_ = vmlal_u16(vmlal_u16(
1349 vmull_u16(vget_low_u16(v_b), v_cb),
1350 vget_low_u16(v_g), v_cg),
1351 vget_low_u16(v_r), v_cr);
1352 uint32x4_t v_dst1_ = vmlal_u16(vmlal_u16(
1353 vmull_u16(vget_high_u16(v_b), v_cb),
1354 vget_high_u16(v_g), v_cg),
1355 vget_high_u16(v_r), v_cr);
1356
1357 uint16x4_t v_dst0 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0_, v_delta), yuv_shift));
1358 uint16x4_t v_dst1 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1_, v_delta), yuv_shift));
1359
1360 vst1q_u16(dst + i, vcombine_u16(v_dst0, v_dst1));
1361 }
1362
1363 for ( ; i <= n - 4; i += 4, src += scn * 4)
1364 {
1365 uint16x4_t v_b, v_r, v_g;
1366 if (scn == 3)
1367 {
1368 uint16x4x3_t v_src = vld3_u16(src);
1369 v_b = v_src.val[0];
1370 v_g = v_src.val[1];
1371 v_r = v_src.val[2];
1372 }
1373 else
1374 {
1375 uint16x4x4_t v_src = vld4_u16(src);
1376 v_b = v_src.val[0];
1377 v_g = v_src.val[1];
1378 v_r = v_src.val[2];
1379 }
1380
1381 uint32x4_t v_dst = vmlal_u16(vmlal_u16(
1382 vmull_u16(v_b, v_cb),
1383 v_g, v_cg),
1384 v_r, v_cr);
1385
1386 vst1_u16(dst + i, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_delta), yuv_shift)));
1387 }
1388
1389 for( ; i < n; i++, src += scn)
1390 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1391 }
1392
1393 int srccn, coeffs[3];
1394 uint16x4_t v_cb, v_cg, v_cr;
1395 uint32x4_t v_delta;
1396 };
1397
1398 template <>
1399 struct RGB2Gray<float>
1400 {
1401 typedef float channel_type;
1402
RGB2Graycv::RGB2Gray1403 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1404 {
1405 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1406 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1407 if(blueIdx == 0)
1408 std::swap(coeffs[0], coeffs[2]);
1409
1410 v_cb = vdupq_n_f32(coeffs[0]);
1411 v_cg = vdupq_n_f32(coeffs[1]);
1412 v_cr = vdupq_n_f32(coeffs[2]);
1413 }
1414
operator ()cv::RGB2Gray1415 void operator()(const float * src, float * dst, int n) const
1416 {
1417 int scn = srccn, i = 0;
1418 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1419
1420 if (scn == 3)
1421 {
1422 for ( ; i <= n - 8; i += 8, src += scn * 8)
1423 {
1424 float32x4x3_t v_src = vld3q_f32(src);
1425 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1426
1427 v_src = vld3q_f32(src + scn * 4);
1428 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1429 }
1430
1431 for ( ; i <= n - 4; i += 4, src += scn * 4)
1432 {
1433 float32x4x3_t v_src = vld3q_f32(src);
1434 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1435 }
1436 }
1437 else
1438 {
1439 for ( ; i <= n - 8; i += 8, src += scn * 8)
1440 {
1441 float32x4x4_t v_src = vld4q_f32(src);
1442 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1443
1444 v_src = vld4q_f32(src + scn * 4);
1445 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1446 }
1447
1448 for ( ; i <= n - 4; i += 4, src += scn * 4)
1449 {
1450 float32x4x4_t v_src = vld4q_f32(src);
1451 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr));
1452 }
1453 }
1454
1455 for ( ; i < n; i++, src += scn)
1456 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr;
1457 }
1458
1459 int srccn;
1460 float coeffs[3];
1461 float32x4_t v_cb, v_cg, v_cr;
1462 };
1463
1464 #elif CV_SSE2
1465
1466 #if CV_SSE4_1
1467
1468 template <>
1469 struct RGB2Gray<ushort>
1470 {
1471 typedef ushort channel_type;
1472
RGB2Graycv::RGB2Gray1473 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) :
1474 srccn(_srccn)
1475 {
1476 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1477 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1478 if( blueIdx == 0 )
1479 std::swap(coeffs[0], coeffs[2]);
1480
1481 v_cb = _mm_set1_epi16((short)coeffs[0]);
1482 v_cg = _mm_set1_epi16((short)coeffs[1]);
1483 v_cr = _mm_set1_epi16((short)coeffs[2]);
1484 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1));
1485
1486 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
1487 }
1488
1489 // 16s x 8
processcv::RGB2Gray1490 void process(__m128i v_b, __m128i v_g, __m128i v_r,
1491 __m128i & v_gray) const
1492 {
1493 __m128i v_mullo_r = _mm_mullo_epi16(v_r, v_cr);
1494 __m128i v_mullo_g = _mm_mullo_epi16(v_g, v_cg);
1495 __m128i v_mullo_b = _mm_mullo_epi16(v_b, v_cb);
1496 __m128i v_mulhi_r = _mm_mulhi_epu16(v_r, v_cr);
1497 __m128i v_mulhi_g = _mm_mulhi_epu16(v_g, v_cg);
1498 __m128i v_mulhi_b = _mm_mulhi_epu16(v_b, v_cb);
1499
1500 __m128i v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_r, v_mulhi_r),
1501 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g));
1502 v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), v_gray0);
1503 v_gray0 = _mm_srli_epi32(_mm_add_epi32(v_gray0, v_delta), yuv_shift);
1504
1505 __m128i v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_r, v_mulhi_r),
1506 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g));
1507 v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), v_gray1);
1508 v_gray1 = _mm_srli_epi32(_mm_add_epi32(v_gray1, v_delta), yuv_shift);
1509
1510 v_gray = _mm_packus_epi32(v_gray0, v_gray1);
1511 }
1512
operator ()cv::RGB2Gray1513 void operator()(const ushort* src, ushort* dst, int n) const
1514 {
1515 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0;
1516
1517 if (scn == 3 && haveSIMD)
1518 {
1519 for ( ; i <= n - 16; i += 16, src += scn * 16)
1520 {
1521 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src));
1522 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8));
1523 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16));
1524 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24));
1525 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32));
1526 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40));
1527
1528 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
1529
1530 __m128i v_gray0;
1531 process(v_r0, v_g0, v_b0,
1532 v_gray0);
1533
1534 __m128i v_gray1;
1535 process(v_r1, v_g1, v_b1,
1536 v_gray1);
1537
1538 _mm_storeu_si128((__m128i *)(dst + i), v_gray0);
1539 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1);
1540 }
1541 }
1542 else if (scn == 4 && haveSIMD)
1543 {
1544 for ( ; i <= n - 16; i += 16, src += scn * 16)
1545 {
1546 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src));
1547 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8));
1548 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16));
1549 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24));
1550 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32));
1551 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40));
1552 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48));
1553 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56));
1554
1555 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1);
1556
1557 __m128i v_gray0;
1558 process(v_r0, v_g0, v_b0,
1559 v_gray0);
1560
1561 __m128i v_gray1;
1562 process(v_r1, v_g1, v_b1,
1563 v_gray1);
1564
1565 _mm_storeu_si128((__m128i *)(dst + i), v_gray0);
1566 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1);
1567 }
1568 }
1569
1570 for( ; i < n; i++, src += scn)
1571 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1572 }
1573
1574 int srccn, coeffs[3];
1575 __m128i v_cb, v_cg, v_cr;
1576 __m128i v_delta;
1577 bool haveSIMD;
1578 };
1579
1580 #endif // CV_SSE4_1
1581
1582 template <>
1583 struct RGB2Gray<float>
1584 {
1585 typedef float channel_type;
1586
RGB2Graycv::RGB2Gray1587 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
1588 {
1589 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f };
1590 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
1591 if(blueIdx == 0)
1592 std::swap(coeffs[0], coeffs[2]);
1593
1594 v_cb = _mm_set1_ps(coeffs[0]);
1595 v_cg = _mm_set1_ps(coeffs[1]);
1596 v_cr = _mm_set1_ps(coeffs[2]);
1597
1598 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
1599 }
1600
processcv::RGB2Gray1601 void process(__m128 v_b, __m128 v_g, __m128 v_r,
1602 __m128 & v_gray) const
1603 {
1604 v_gray = _mm_mul_ps(v_r, v_cr);
1605 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_g, v_cg));
1606 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_b, v_cb));
1607 }
1608
operator ()cv::RGB2Gray1609 void operator()(const float * src, float * dst, int n) const
1610 {
1611 int scn = srccn, i = 0;
1612 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1613
1614 if (scn == 3 && haveSIMD)
1615 {
1616 for ( ; i <= n - 8; i += 8, src += scn * 8)
1617 {
1618 __m128 v_r0 = _mm_loadu_ps(src);
1619 __m128 v_r1 = _mm_loadu_ps(src + 4);
1620 __m128 v_g0 = _mm_loadu_ps(src + 8);
1621 __m128 v_g1 = _mm_loadu_ps(src + 12);
1622 __m128 v_b0 = _mm_loadu_ps(src + 16);
1623 __m128 v_b1 = _mm_loadu_ps(src + 20);
1624
1625 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
1626
1627 __m128 v_gray0;
1628 process(v_r0, v_g0, v_b0,
1629 v_gray0);
1630
1631 __m128 v_gray1;
1632 process(v_r1, v_g1, v_b1,
1633 v_gray1);
1634
1635 _mm_storeu_ps(dst + i, v_gray0);
1636 _mm_storeu_ps(dst + i + 4, v_gray1);
1637 }
1638 }
1639 else if (scn == 4 && haveSIMD)
1640 {
1641 for ( ; i <= n - 8; i += 8, src += scn * 8)
1642 {
1643 __m128 v_r0 = _mm_loadu_ps(src);
1644 __m128 v_r1 = _mm_loadu_ps(src + 4);
1645 __m128 v_g0 = _mm_loadu_ps(src + 8);
1646 __m128 v_g1 = _mm_loadu_ps(src + 12);
1647 __m128 v_b0 = _mm_loadu_ps(src + 16);
1648 __m128 v_b1 = _mm_loadu_ps(src + 20);
1649 __m128 v_a0 = _mm_loadu_ps(src + 24);
1650 __m128 v_a1 = _mm_loadu_ps(src + 28);
1651
1652 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1);
1653
1654 __m128 v_gray0;
1655 process(v_r0, v_g0, v_b0,
1656 v_gray0);
1657
1658 __m128 v_gray1;
1659 process(v_r1, v_g1, v_b1,
1660 v_gray1);
1661
1662 _mm_storeu_ps(dst + i, v_gray0);
1663 _mm_storeu_ps(dst + i + 4, v_gray1);
1664 }
1665 }
1666
1667 for ( ; i < n; i++, src += scn)
1668 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr;
1669 }
1670
1671 int srccn;
1672 float coeffs[3];
1673 __m128 v_cb, v_cg, v_cr;
1674 bool haveSIMD;
1675 };
1676
1677 #else
1678
1679 template<> struct RGB2Gray<ushort>
1680 {
1681 typedef ushort channel_type;
1682
RGB2Graycv::RGB2Gray1683 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn)
1684 {
1685 static const int coeffs0[] = { R2Y, G2Y, B2Y };
1686 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]));
1687 if( blueIdx == 0 )
1688 std::swap(coeffs[0], coeffs[2]);
1689 }
1690
operator ()cv::RGB2Gray1691 void operator()(const ushort* src, ushort* dst, int n) const
1692 {
1693 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2];
1694 for(int i = 0; i < n; i++, src += scn)
1695 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift);
1696 }
1697 int srccn;
1698 int coeffs[3];
1699 };
1700
1701 #endif
1702
1703 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
1704
1705 template<typename _Tp> struct RGB2YCrCb_f
1706 {
1707 typedef _Tp channel_type;
1708
RGB2YCrCb_fcv::RGB2YCrCb_f1709 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx)
1710 {
1711 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1712 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1713 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1714 }
1715
operator ()cv::RGB2YCrCb_f1716 void operator()(const _Tp* src, _Tp* dst, int n) const
1717 {
1718 int scn = srccn, bidx = blueIdx;
1719 const _Tp delta = ColorChannel<_Tp>::half();
1720 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1721 n *= 3;
1722 for(int i = 0; i < n; i += 3, src += scn)
1723 {
1724 _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
1725 _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
1726 _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
1727 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1728 }
1729 }
1730 int srccn, blueIdx;
1731 float coeffs[5];
1732 };
1733
1734 #if CV_NEON
1735
1736 template <>
1737 struct RGB2YCrCb_f<float>
1738 {
1739 typedef float channel_type;
1740
RGB2YCrCb_fcv::RGB2YCrCb_f1741 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
1742 srccn(_srccn), blueIdx(_blueIdx)
1743 {
1744 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1745 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1746 if(blueIdx==0)
1747 std::swap(coeffs[0], coeffs[2]);
1748
1749 v_c0 = vdupq_n_f32(coeffs[0]);
1750 v_c1 = vdupq_n_f32(coeffs[1]);
1751 v_c2 = vdupq_n_f32(coeffs[2]);
1752 v_c3 = vdupq_n_f32(coeffs[3]);
1753 v_c4 = vdupq_n_f32(coeffs[4]);
1754 v_delta = vdupq_n_f32(ColorChannel<float>::half());
1755 }
1756
operator ()cv::RGB2YCrCb_f1757 void operator()(const float * src, float * dst, int n) const
1758 {
1759 int scn = srccn, bidx = blueIdx, i = 0;
1760 const float delta = ColorChannel<float>::half();
1761 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1762 n *= 3;
1763
1764 if (scn == 3)
1765 for ( ; i <= n - 12; i += 12, src += 12)
1766 {
1767 float32x4x3_t v_src = vld3q_f32(src), v_dst;
1768 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1769 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1770 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1771
1772 vst3q_f32(dst + i, v_dst);
1773 }
1774 else
1775 for ( ; i <= n - 12; i += 12, src += 16)
1776 {
1777 float32x4x4_t v_src = vld4q_f32(src);
1778 float32x4x3_t v_dst;
1779 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
1780 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
1781 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
1782
1783 vst3q_f32(dst + i, v_dst);
1784 }
1785
1786 for ( ; i < n; i += 3, src += scn)
1787 {
1788 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
1789 float Cr = (src[bidx^2] - Y)*C3 + delta;
1790 float Cb = (src[bidx] - Y)*C4 + delta;
1791 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1792 }
1793 }
1794 int srccn, blueIdx;
1795 float coeffs[5];
1796 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
1797 };
1798
1799 #elif CV_SSE2
1800
1801 template <>
1802 struct RGB2YCrCb_f<float>
1803 {
1804 typedef float channel_type;
1805
RGB2YCrCb_fcv::RGB2YCrCb_f1806 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) :
1807 srccn(_srccn), blueIdx(_blueIdx)
1808 {
1809 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
1810 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1811 if (blueIdx==0)
1812 std::swap(coeffs[0], coeffs[2]);
1813
1814 v_c0 = _mm_set1_ps(coeffs[0]);
1815 v_c1 = _mm_set1_ps(coeffs[1]);
1816 v_c2 = _mm_set1_ps(coeffs[2]);
1817 v_c3 = _mm_set1_ps(coeffs[3]);
1818 v_c4 = _mm_set1_ps(coeffs[4]);
1819 v_delta = _mm_set1_ps(ColorChannel<float>::half());
1820
1821 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
1822 }
1823
processcv::RGB2YCrCb_f1824 void process(__m128 v_r, __m128 v_g, __m128 v_b,
1825 __m128 & v_y, __m128 & v_cr, __m128 & v_cb) const
1826 {
1827 v_y = _mm_mul_ps(v_r, v_c0);
1828 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c1));
1829 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c2));
1830
1831 v_cr = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 0 ? v_b : v_r, v_y), v_c3), v_delta);
1832 v_cb = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 2 ? v_b : v_r, v_y), v_c4), v_delta);
1833 }
1834
operator ()cv::RGB2YCrCb_f1835 void operator()(const float * src, float * dst, int n) const
1836 {
1837 int scn = srccn, bidx = blueIdx, i = 0;
1838 const float delta = ColorChannel<float>::half();
1839 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1840 n *= 3;
1841
1842 if (haveSIMD)
1843 {
1844 for ( ; i <= n - 24; i += 24, src += 8 * scn)
1845 {
1846 __m128 v_r0 = _mm_loadu_ps(src);
1847 __m128 v_r1 = _mm_loadu_ps(src + 4);
1848 __m128 v_g0 = _mm_loadu_ps(src + 8);
1849 __m128 v_g1 = _mm_loadu_ps(src + 12);
1850 __m128 v_b0 = _mm_loadu_ps(src + 16);
1851 __m128 v_b1 = _mm_loadu_ps(src + 20);
1852
1853 if (scn == 4)
1854 {
1855 __m128 v_a0 = _mm_loadu_ps(src + 24);
1856 __m128 v_a1 = _mm_loadu_ps(src + 28);
1857 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1,
1858 v_b0, v_b1, v_a0, v_a1);
1859 }
1860 else
1861 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
1862
1863 __m128 v_y0, v_cr0, v_cb0;
1864 process(v_r0, v_g0, v_b0,
1865 v_y0, v_cr0, v_cb0);
1866
1867 __m128 v_y1, v_cr1, v_cb1;
1868 process(v_r1, v_g1, v_b1,
1869 v_y1, v_cr1, v_cb1);
1870
1871 _mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
1872
1873 _mm_storeu_ps(dst + i, v_y0);
1874 _mm_storeu_ps(dst + i + 4, v_y1);
1875 _mm_storeu_ps(dst + i + 8, v_cr0);
1876 _mm_storeu_ps(dst + i + 12, v_cr1);
1877 _mm_storeu_ps(dst + i + 16, v_cb0);
1878 _mm_storeu_ps(dst + i + 20, v_cb1);
1879 }
1880 }
1881
1882 for ( ; i < n; i += 3, src += scn)
1883 {
1884 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
1885 float Cr = (src[bidx^2] - Y)*C3 + delta;
1886 float Cb = (src[bidx] - Y)*C4 + delta;
1887 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb;
1888 }
1889 }
1890 int srccn, blueIdx;
1891 float coeffs[5];
1892 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
1893 bool haveSIMD;
1894 };
1895
1896 #endif
1897
1898 template<typename _Tp> struct RGB2YCrCb_i
1899 {
1900 typedef _Tp channel_type;
1901
RGB2YCrCb_icv::RGB2YCrCb_i1902 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1903 : srccn(_srccn), blueIdx(_blueIdx)
1904 {
1905 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1906 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1907 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
1908 }
operator ()cv::RGB2YCrCb_i1909 void operator()(const _Tp* src, _Tp* dst, int n) const
1910 {
1911 int scn = srccn, bidx = blueIdx;
1912 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1913 int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
1914 n *= 3;
1915 for(int i = 0; i < n; i += 3, src += scn)
1916 {
1917 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
1918 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
1919 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
1920 dst[i] = saturate_cast<_Tp>(Y);
1921 dst[i+1] = saturate_cast<_Tp>(Cr);
1922 dst[i+2] = saturate_cast<_Tp>(Cb);
1923 }
1924 }
1925 int srccn, blueIdx;
1926 int coeffs[5];
1927 };
1928
1929 #if CV_NEON
1930
1931 template <>
1932 struct RGB2YCrCb_i<uchar>
1933 {
1934 typedef uchar channel_type;
1935
RGB2YCrCb_icv::RGB2YCrCb_i1936 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
1937 : srccn(_srccn), blueIdx(_blueIdx)
1938 {
1939 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
1940 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
1941 if (blueIdx==0)
1942 std::swap(coeffs[0], coeffs[2]);
1943
1944 v_c0 = vdup_n_s16(coeffs[0]);
1945 v_c1 = vdup_n_s16(coeffs[1]);
1946 v_c2 = vdup_n_s16(coeffs[2]);
1947 v_c3 = vdupq_n_s32(coeffs[3]);
1948 v_c4 = vdupq_n_s32(coeffs[4]);
1949 v_delta = vdupq_n_s32(ColorChannel<uchar>::half()*(1 << yuv_shift));
1950 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
1951 }
1952
operator ()cv::RGB2YCrCb_i1953 void operator()(const uchar * src, uchar * dst, int n) const
1954 {
1955 int scn = srccn, bidx = blueIdx, i = 0;
1956 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
1957 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
1958 n *= 3;
1959
1960 for ( ; i <= n - 24; i += 24, src += scn * 8)
1961 {
1962 uint8x8x3_t v_dst;
1963 int16x8x3_t v_src16;
1964
1965 if (scn == 3)
1966 {
1967 uint8x8x3_t v_src = vld3_u8(src);
1968 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1969 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1970 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1971 }
1972 else
1973 {
1974 uint8x8x4_t v_src = vld4_u8(src);
1975 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
1976 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
1977 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
1978 }
1979
1980 int16x4x3_t v_src0;
1981 v_src0.val[0] = vget_low_s16(v_src16.val[0]);
1982 v_src0.val[1] = vget_low_s16(v_src16.val[1]);
1983 v_src0.val[2] = vget_low_s16(v_src16.val[2]);
1984
1985 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1986 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
1987 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y0), v_c3);
1988 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
1989 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y0), v_c4);
1990 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
1991
1992 v_src0.val[0] = vget_high_s16(v_src16.val[0]);
1993 v_src0.val[1] = vget_high_s16(v_src16.val[1]);
1994 v_src0.val[2] = vget_high_s16(v_src16.val[2]);
1995
1996 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
1997 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
1998 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y1), v_c3);
1999 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
2000 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y1), v_c4);
2001 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
2002
2003 v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
2004 v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
2005 v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
2006
2007 vst3_u8(dst + i, v_dst);
2008 }
2009
2010 for ( ; i < n; i += 3, src += scn)
2011 {
2012 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
2013 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
2014 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
2015 dst[i] = saturate_cast<uchar>(Y);
2016 dst[i+1] = saturate_cast<uchar>(Cr);
2017 dst[i+2] = saturate_cast<uchar>(Cb);
2018 }
2019 }
2020 int srccn, blueIdx, coeffs[5];
2021 int16x4_t v_c0, v_c1, v_c2;
2022 int32x4_t v_c3, v_c4, v_delta, v_delta2;
2023 };
2024
2025 template <>
2026 struct RGB2YCrCb_i<ushort>
2027 {
2028 typedef ushort channel_type;
2029
RGB2YCrCb_icv::RGB2YCrCb_i2030 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
2031 : srccn(_srccn), blueIdx(_blueIdx)
2032 {
2033 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
2034 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
2035 if (blueIdx==0)
2036 std::swap(coeffs[0], coeffs[2]);
2037
2038 v_c0 = vdupq_n_s32(coeffs[0]);
2039 v_c1 = vdupq_n_s32(coeffs[1]);
2040 v_c2 = vdupq_n_s32(coeffs[2]);
2041 v_c3 = vdupq_n_s32(coeffs[3]);
2042 v_c4 = vdupq_n_s32(coeffs[4]);
2043 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()*(1 << yuv_shift));
2044 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
2045 }
2046
operator ()cv::RGB2YCrCb_i2047 void operator()(const ushort * src, ushort * dst, int n) const
2048 {
2049 int scn = srccn, bidx = blueIdx, i = 0;
2050 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
2051 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
2052 n *= 3;
2053
2054 for ( ; i <= n - 24; i += 24, src += scn * 8)
2055 {
2056 uint16x8x3_t v_src, v_dst;
2057 int32x4x3_t v_src0;
2058
2059 if (scn == 3)
2060 v_src = vld3q_u16(src);
2061 else
2062 {
2063 uint16x8x4_t v_src_ = vld4q_u16(src);
2064 v_src.val[0] = v_src_.val[0];
2065 v_src.val[1] = v_src_.val[1];
2066 v_src.val[2] = v_src_.val[2];
2067 }
2068
2069 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0])));
2070 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1])));
2071 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
2072
2073 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
2074 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift);
2075 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y0), v_c3);
2076 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift);
2077 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y0), v_c4);
2078 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift);
2079
2080 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
2081 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
2082 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2083
2084 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
2085 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift);
2086 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y1), v_c3);
2087 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift);
2088 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y1), v_c4);
2089 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
2090
2091 v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
2092 v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
2093 v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
2094
2095 vst3q_u16(dst + i, v_dst);
2096 }
2097
2098 for ( ; i <= n - 12; i += 12, src += scn * 4)
2099 {
2100 uint16x4x3_t v_dst;
2101 int32x4x3_t v_src0;
2102
2103 if (scn == 3)
2104 {
2105 uint16x4x3_t v_src = vld3_u16(src);
2106 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
2107 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
2108 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2109 }
2110 else
2111 {
2112 uint16x4x4_t v_src = vld4_u16(src);
2113 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0]));
2114 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1]));
2115 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2116 }
2117
2118 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2);
2119 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta2), yuv_shift);
2120 int32x4_t v_Cr = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y), v_c3);
2121 v_Cr = vshrq_n_s32(vaddq_s32(v_Cr, v_delta2), yuv_shift);
2122 int32x4_t v_Cb = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y), v_c4);
2123 v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift);
2124
2125 v_dst.val[0] = vqmovun_s32(v_Y);
2126 v_dst.val[1] = vqmovun_s32(v_Cr);
2127 v_dst.val[2] = vqmovun_s32(v_Cb);
2128
2129 vst3_u16(dst + i, v_dst);
2130 }
2131
2132 for ( ; i < n; i += 3, src += scn)
2133 {
2134 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
2135 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
2136 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
2137 dst[i] = saturate_cast<ushort>(Y);
2138 dst[i+1] = saturate_cast<ushort>(Cr);
2139 dst[i+2] = saturate_cast<ushort>(Cb);
2140 }
2141 }
2142 int srccn, blueIdx, coeffs[5];
2143 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
2144 };
2145
2146 #elif CV_SSE4_1
2147
2148 template <>
2149 struct RGB2YCrCb_i<uchar>
2150 {
2151 typedef uchar channel_type;
2152
RGB2YCrCb_icv::RGB2YCrCb_i2153 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
2154 : srccn(_srccn), blueIdx(_blueIdx)
2155 {
2156 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
2157 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
2158 if (blueIdx==0)
2159 std::swap(coeffs[0], coeffs[2]);
2160
2161 v_c0 = _mm_set1_epi32(coeffs[0]);
2162 v_c1 = _mm_set1_epi32(coeffs[1]);
2163 v_c2 = _mm_set1_epi32(coeffs[2]);
2164 v_c3 = _mm_set1_epi32(coeffs[3]);
2165 v_c4 = _mm_set1_epi32(coeffs[4]);
2166 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1));
2167 v_delta = _mm_set1_epi32(ColorChannel<uchar>::half()*(1 << yuv_shift));
2168 v_delta = _mm_add_epi32(v_delta, v_delta2);
2169 v_zero = _mm_setzero_si128();
2170
2171 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
2172 }
2173
2174 // 16u x 8
processcv::RGB2YCrCb_i2175 void process(__m128i v_r, __m128i v_g, __m128i v_b,
2176 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const
2177 {
2178 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero);
2179 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero);
2180 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero);
2181
2182 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0),
2183 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1),
2184 _mm_mullo_epi32(v_b_p, v_c2)));
2185 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift);
2186
2187 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3);
2188 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4);
2189 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift);
2190 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift);
2191
2192 v_r_p = _mm_unpackhi_epi16(v_r, v_zero);
2193 v_g_p = _mm_unpackhi_epi16(v_g, v_zero);
2194 v_b_p = _mm_unpackhi_epi16(v_b, v_zero);
2195
2196 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0),
2197 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1),
2198 _mm_mullo_epi32(v_b_p, v_c2)));
2199 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift);
2200
2201 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3);
2202 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4);
2203 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift);
2204 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift);
2205
2206 v_y = _mm_packs_epi32(v_y0, v_y1);
2207 v_cr = _mm_packs_epi32(v_cr0, v_cr1);
2208 v_cb = _mm_packs_epi32(v_cb0, v_cb1);
2209 }
2210
operator ()cv::RGB2YCrCb_i2211 void operator()(const uchar * src, uchar * dst, int n) const
2212 {
2213 int scn = srccn, bidx = blueIdx, i = 0;
2214 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
2215 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
2216 n *= 3;
2217
2218 if (haveSIMD)
2219 {
2220 for ( ; i <= n - 96; i += 96, src += scn * 32)
2221 {
2222 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src));
2223 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 16));
2224 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 32));
2225 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 48));
2226 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 64));
2227 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 80));
2228
2229 if (scn == 4)
2230 {
2231 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 96));
2232 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 112));
2233 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1,
2234 v_b0, v_b1, v_a0, v_a1);
2235 }
2236 else
2237 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
2238
2239 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero;
2240 process(_mm_unpacklo_epi8(v_r0, v_zero),
2241 _mm_unpacklo_epi8(v_g0, v_zero),
2242 _mm_unpacklo_epi8(v_b0, v_zero),
2243 v_y0, v_cr0, v_cb0);
2244
2245 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero;
2246 process(_mm_unpackhi_epi8(v_r0, v_zero),
2247 _mm_unpackhi_epi8(v_g0, v_zero),
2248 _mm_unpackhi_epi8(v_b0, v_zero),
2249 v_y1, v_cr1, v_cb1);
2250
2251 __m128i v_y_0 = _mm_packus_epi16(v_y0, v_y1);
2252 __m128i v_cr_0 = _mm_packus_epi16(v_cr0, v_cr1);
2253 __m128i v_cb_0 = _mm_packus_epi16(v_cb0, v_cb1);
2254
2255 process(_mm_unpacklo_epi8(v_r1, v_zero),
2256 _mm_unpacklo_epi8(v_g1, v_zero),
2257 _mm_unpacklo_epi8(v_b1, v_zero),
2258 v_y0, v_cr0, v_cb0);
2259
2260 process(_mm_unpackhi_epi8(v_r1, v_zero),
2261 _mm_unpackhi_epi8(v_g1, v_zero),
2262 _mm_unpackhi_epi8(v_b1, v_zero),
2263 v_y1, v_cr1, v_cb1);
2264
2265 __m128i v_y_1 = _mm_packus_epi16(v_y0, v_y1);
2266 __m128i v_cr_1 = _mm_packus_epi16(v_cr0, v_cr1);
2267 __m128i v_cb_1 = _mm_packus_epi16(v_cb0, v_cb1);
2268
2269 _mm_interleave_epi8(v_y_0, v_y_1, v_cr_0, v_cr_1, v_cb_0, v_cb_1);
2270
2271 _mm_storeu_si128((__m128i *)(dst + i), v_y_0);
2272 _mm_storeu_si128((__m128i *)(dst + i + 16), v_y_1);
2273 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cr_0);
2274 _mm_storeu_si128((__m128i *)(dst + i + 48), v_cr_1);
2275 _mm_storeu_si128((__m128i *)(dst + i + 64), v_cb_0);
2276 _mm_storeu_si128((__m128i *)(dst + i + 80), v_cb_1);
2277 }
2278 }
2279
2280 for ( ; i < n; i += 3, src += scn)
2281 {
2282 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
2283 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
2284 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
2285 dst[i] = saturate_cast<uchar>(Y);
2286 dst[i+1] = saturate_cast<uchar>(Cr);
2287 dst[i+2] = saturate_cast<uchar>(Cb);
2288 }
2289 }
2290
2291 int srccn, blueIdx, coeffs[5];
2292 __m128i v_c0, v_c1, v_c2;
2293 __m128i v_c3, v_c4, v_delta, v_delta2;
2294 __m128i v_zero;
2295 bool haveSIMD;
2296 };
2297
2298 template <>
2299 struct RGB2YCrCb_i<ushort>
2300 {
2301 typedef ushort channel_type;
2302
RGB2YCrCb_icv::RGB2YCrCb_i2303 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs)
2304 : srccn(_srccn), blueIdx(_blueIdx)
2305 {
2306 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241};
2307 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0]));
2308 if (blueIdx==0)
2309 std::swap(coeffs[0], coeffs[2]);
2310
2311 v_c0 = _mm_set1_epi32(coeffs[0]);
2312 v_c1 = _mm_set1_epi32(coeffs[1]);
2313 v_c2 = _mm_set1_epi32(coeffs[2]);
2314 v_c3 = _mm_set1_epi32(coeffs[3]);
2315 v_c4 = _mm_set1_epi32(coeffs[4]);
2316 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1));
2317 v_delta = _mm_set1_epi32(ColorChannel<ushort>::half()*(1 << yuv_shift));
2318 v_delta = _mm_add_epi32(v_delta, v_delta2);
2319 v_zero = _mm_setzero_si128();
2320
2321 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
2322 }
2323
2324 // 16u x 8
processcv::RGB2YCrCb_i2325 void process(__m128i v_r, __m128i v_g, __m128i v_b,
2326 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const
2327 {
2328 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero);
2329 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero);
2330 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero);
2331
2332 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0),
2333 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1),
2334 _mm_mullo_epi32(v_b_p, v_c2)));
2335 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift);
2336
2337 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3);
2338 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4);
2339 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift);
2340 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift);
2341
2342 v_r_p = _mm_unpackhi_epi16(v_r, v_zero);
2343 v_g_p = _mm_unpackhi_epi16(v_g, v_zero);
2344 v_b_p = _mm_unpackhi_epi16(v_b, v_zero);
2345
2346 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0),
2347 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1),
2348 _mm_mullo_epi32(v_b_p, v_c2)));
2349 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift);
2350
2351 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3);
2352 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4);
2353 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift);
2354 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift);
2355
2356 v_y = _mm_packus_epi32(v_y0, v_y1);
2357 v_cr = _mm_packus_epi32(v_cr0, v_cr1);
2358 v_cb = _mm_packus_epi32(v_cb0, v_cb1);
2359 }
2360
operator ()cv::RGB2YCrCb_i2361 void operator()(const ushort * src, ushort * dst, int n) const
2362 {
2363 int scn = srccn, bidx = blueIdx, i = 0;
2364 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
2365 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
2366 n *= 3;
2367
2368 if (haveSIMD)
2369 {
2370 for ( ; i <= n - 48; i += 48, src += scn * 16)
2371 {
2372 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src));
2373 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8));
2374 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16));
2375 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24));
2376 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32));
2377 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40));
2378
2379 if (scn == 4)
2380 {
2381 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48));
2382 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56));
2383
2384 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1,
2385 v_b0, v_b1, v_a0, v_a1);
2386 }
2387 else
2388 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
2389
2390 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero;
2391 process(v_r0, v_g0, v_b0,
2392 v_y0, v_cr0, v_cb0);
2393
2394 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero;
2395 process(v_r1, v_g1, v_b1,
2396 v_y1, v_cr1, v_cb1);
2397
2398 _mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
2399
2400 _mm_storeu_si128((__m128i *)(dst + i), v_y0);
2401 _mm_storeu_si128((__m128i *)(dst + i + 8), v_y1);
2402 _mm_storeu_si128((__m128i *)(dst + i + 16), v_cr0);
2403 _mm_storeu_si128((__m128i *)(dst + i + 24), v_cr1);
2404 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cb0);
2405 _mm_storeu_si128((__m128i *)(dst + i + 40), v_cb1);
2406 }
2407 }
2408
2409 for ( ; i < n; i += 3, src += scn)
2410 {
2411 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift);
2412 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
2413 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
2414 dst[i] = saturate_cast<ushort>(Y);
2415 dst[i+1] = saturate_cast<ushort>(Cr);
2416 dst[i+2] = saturate_cast<ushort>(Cb);
2417 }
2418 }
2419
2420 int srccn, blueIdx, coeffs[5];
2421 __m128i v_c0, v_c1, v_c2;
2422 __m128i v_c3, v_c4, v_delta, v_delta2;
2423 __m128i v_zero;
2424 bool haveSIMD;
2425 };
2426
2427 #endif // CV_SSE4_1
2428
2429 template<typename _Tp> struct YCrCb2RGB_f
2430 {
2431 typedef _Tp channel_type;
2432
YCrCb2RGB_fcv::YCrCb2RGB_f2433 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
2434 : dstcn(_dstcn), blueIdx(_blueIdx)
2435 {
2436 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
2437 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2438 }
operator ()cv::YCrCb2RGB_f2439 void operator()(const _Tp* src, _Tp* dst, int n) const
2440 {
2441 int dcn = dstcn, bidx = blueIdx;
2442 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
2443 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2444 n *= 3;
2445 for(int i = 0; i < n; i += 3, dst += dcn)
2446 {
2447 _Tp Y = src[i];
2448 _Tp Cr = src[i+1];
2449 _Tp Cb = src[i+2];
2450
2451 _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
2452 _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
2453 _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0);
2454
2455 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
2456 if( dcn == 4 )
2457 dst[3] = alpha;
2458 }
2459 }
2460 int dstcn, blueIdx;
2461 float coeffs[4];
2462 };
2463
2464 #if CV_NEON
2465
2466 template <>
2467 struct YCrCb2RGB_f<float>
2468 {
2469 typedef float channel_type;
2470
YCrCb2RGB_fcv::YCrCb2RGB_f2471 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
2472 : dstcn(_dstcn), blueIdx(_blueIdx)
2473 {
2474 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
2475 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2476
2477 v_c0 = vdupq_n_f32(coeffs[0]);
2478 v_c1 = vdupq_n_f32(coeffs[1]);
2479 v_c2 = vdupq_n_f32(coeffs[2]);
2480 v_c3 = vdupq_n_f32(coeffs[3]);
2481 v_delta = vdupq_n_f32(ColorChannel<float>::half());
2482 v_alpha = vdupq_n_f32(ColorChannel<float>::max());
2483 }
2484
operator ()cv::YCrCb2RGB_f2485 void operator()(const float* src, float* dst, int n) const
2486 {
2487 int dcn = dstcn, bidx = blueIdx, i = 0;
2488 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
2489 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2490 n *= 3;
2491
2492 if (dcn == 3)
2493 for ( ; i <= n - 12; i += 12, dst += 12)
2494 {
2495 float32x4x3_t v_src = vld3q_f32(src + i), v_dst;
2496 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
2497
2498 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
2499 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
2500 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
2501
2502 vst3q_f32(dst, v_dst);
2503 }
2504 else
2505 for ( ; i <= n - 12; i += 12, dst += 16)
2506 {
2507 float32x4x3_t v_src = vld3q_f32(src + i);
2508 float32x4x4_t v_dst;
2509 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2];
2510
2511 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
2512 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
2513 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0);
2514 v_dst.val[3] = v_alpha;
2515
2516 vst4q_f32(dst, v_dst);
2517 }
2518
2519 for ( ; i < n; i += 3, dst += dcn)
2520 {
2521 float Y = src[i], Cr = src[i+1], Cb = src[i+2];
2522
2523 float b = Y + (Cb - delta)*C3;
2524 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
2525 float r = Y + (Cr - delta)*C0;
2526
2527 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
2528 if( dcn == 4 )
2529 dst[3] = alpha;
2530 }
2531 }
2532 int dstcn, blueIdx;
2533 float coeffs[4];
2534 float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
2535 };
2536
2537 #elif CV_SSE2
2538
2539 template <>
2540 struct YCrCb2RGB_f<float>
2541 {
2542 typedef float channel_type;
2543
YCrCb2RGB_fcv::YCrCb2RGB_f2544 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
2545 : dstcn(_dstcn), blueIdx(_blueIdx)
2546 {
2547 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f};
2548 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2549
2550 v_c0 = _mm_set1_ps(coeffs[0]);
2551 v_c1 = _mm_set1_ps(coeffs[1]);
2552 v_c2 = _mm_set1_ps(coeffs[2]);
2553 v_c3 = _mm_set1_ps(coeffs[3]);
2554 v_delta = _mm_set1_ps(ColorChannel<float>::half());
2555 v_alpha = _mm_set1_ps(ColorChannel<float>::max());
2556
2557 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
2558 }
2559
processcv::YCrCb2RGB_f2560 void process(__m128 v_y, __m128 v_cr, __m128 v_cb,
2561 __m128 & v_r, __m128 & v_g, __m128 & v_b) const
2562 {
2563 v_cb = _mm_sub_ps(v_cb, v_delta);
2564 v_cr = _mm_sub_ps(v_cr, v_delta);
2565
2566 v_b = _mm_mul_ps(v_cb, v_c3);
2567 v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1));
2568 v_r = _mm_mul_ps(v_cr, v_c0);
2569
2570 v_b = _mm_add_ps(v_b, v_y);
2571 v_g = _mm_add_ps(v_g, v_y);
2572 v_r = _mm_add_ps(v_r, v_y);
2573
2574 if (blueIdx == 0)
2575 std::swap(v_b, v_r);
2576 }
2577
operator ()cv::YCrCb2RGB_f2578 void operator()(const float* src, float* dst, int n) const
2579 {
2580 int dcn = dstcn, bidx = blueIdx, i = 0;
2581 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
2582 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2583 n *= 3;
2584
2585 if (haveSIMD)
2586 {
2587 for ( ; i <= n - 24; i += 24, dst += 8 * dcn)
2588 {
2589 __m128 v_y0 = _mm_loadu_ps(src + i);
2590 __m128 v_y1 = _mm_loadu_ps(src + i + 4);
2591 __m128 v_cr0 = _mm_loadu_ps(src + i + 8);
2592 __m128 v_cr1 = _mm_loadu_ps(src + i + 12);
2593 __m128 v_cb0 = _mm_loadu_ps(src + i + 16);
2594 __m128 v_cb1 = _mm_loadu_ps(src + i + 20);
2595
2596 _mm_deinterleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
2597
2598 __m128 v_r0, v_g0, v_b0;
2599 process(v_y0, v_cr0, v_cb0,
2600 v_r0, v_g0, v_b0);
2601
2602 __m128 v_r1, v_g1, v_b1;
2603 process(v_y1, v_cr1, v_cb1,
2604 v_r1, v_g1, v_b1);
2605
2606 __m128 v_a0 = v_alpha, v_a1 = v_alpha;
2607
2608 if (dcn == 3)
2609 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
2610 else
2611 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1,
2612 v_b0, v_b1, v_a0, v_a1);
2613
2614 _mm_storeu_ps(dst, v_r0);
2615 _mm_storeu_ps(dst + 4, v_r1);
2616 _mm_storeu_ps(dst + 8, v_g0);
2617 _mm_storeu_ps(dst + 12, v_g1);
2618 _mm_storeu_ps(dst + 16, v_b0);
2619 _mm_storeu_ps(dst + 20, v_b1);
2620
2621 if (dcn == 4)
2622 {
2623 _mm_storeu_ps(dst + 24, v_a0);
2624 _mm_storeu_ps(dst + 28, v_a1);
2625 }
2626 }
2627 }
2628
2629 for ( ; i < n; i += 3, dst += dcn)
2630 {
2631 float Y = src[i], Cr = src[i+1], Cb = src[i+2];
2632
2633 float b = Y + (Cb - delta)*C3;
2634 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
2635 float r = Y + (Cr - delta)*C0;
2636
2637 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r;
2638 if( dcn == 4 )
2639 dst[3] = alpha;
2640 }
2641 }
2642 int dstcn, blueIdx;
2643 float coeffs[4];
2644
2645 __m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
2646 bool haveSIMD;
2647 };
2648
2649 #endif
2650
2651 template<typename _Tp> struct YCrCb2RGB_i
2652 {
2653 typedef _Tp channel_type;
2654
YCrCb2RGB_icv::YCrCb2RGB_i2655 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2656 : dstcn(_dstcn), blueIdx(_blueIdx)
2657 {
2658 static const int coeffs0[] = {22987, -11698, -5636, 29049};
2659 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2660 }
2661
operator ()cv::YCrCb2RGB_i2662 void operator()(const _Tp* src, _Tp* dst, int n) const
2663 {
2664 int dcn = dstcn, bidx = blueIdx;
2665 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
2666 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2667 n *= 3;
2668 for(int i = 0; i < n; i += 3, dst += dcn)
2669 {
2670 _Tp Y = src[i];
2671 _Tp Cr = src[i+1];
2672 _Tp Cb = src[i+2];
2673
2674 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
2675 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
2676 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
2677
2678 dst[bidx] = saturate_cast<_Tp>(b);
2679 dst[1] = saturate_cast<_Tp>(g);
2680 dst[bidx^2] = saturate_cast<_Tp>(r);
2681 if( dcn == 4 )
2682 dst[3] = alpha;
2683 }
2684 }
2685 int dstcn, blueIdx;
2686 int coeffs[4];
2687 };
2688
2689 #if CV_NEON
2690
2691 template <>
2692 struct YCrCb2RGB_i<uchar>
2693 {
2694 typedef uchar channel_type;
2695
YCrCb2RGB_icv::YCrCb2RGB_i2696 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2697 : dstcn(_dstcn), blueIdx(_blueIdx)
2698 {
2699 static const int coeffs0[] = {22987, -11698, -5636, 29049};
2700 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2701
2702 v_c0 = vdupq_n_s32(coeffs[0]);
2703 v_c1 = vdupq_n_s32(coeffs[1]);
2704 v_c2 = vdupq_n_s32(coeffs[2]);
2705 v_c3 = vdupq_n_s32(coeffs[3]);
2706 v_delta = vdup_n_s16(ColorChannel<uchar>::half());
2707 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
2708 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
2709 }
2710
operator ()cv::YCrCb2RGB_i2711 void operator()(const uchar* src, uchar* dst, int n) const
2712 {
2713 int dcn = dstcn, bidx = blueIdx, i = 0;
2714 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
2715 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2716 n *= 3;
2717
2718 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2719 {
2720 uint8x8x3_t v_src = vld3_u8(src + i);
2721 int16x8x3_t v_src16;
2722 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
2723 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
2724 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
2725
2726 int16x4_t v_Y = vget_low_s16(v_src16.val[0]),
2727 v_Cr = vget_low_s16(v_src16.val[1]),
2728 v_Cb = vget_low_s16(v_src16.val[2]);
2729
2730 int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
2731 v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
2732 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
2733 v_g0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
2734 int32x4_t v_r0 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
2735 v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
2736
2737 v_Y = vget_high_s16(v_src16.val[0]);
2738 v_Cr = vget_high_s16(v_src16.val[1]);
2739 v_Cb = vget_high_s16(v_src16.val[2]);
2740
2741 int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
2742 v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
2743 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2);
2744 v_g1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
2745 int32x4_t v_r1 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta));
2746 v_r1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
2747
2748 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vmovn_s32(v_b0), vmovn_s32(v_b1)));
2749 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vmovn_s32(v_g0), vmovn_s32(v_g1)));
2750 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vmovn_s32(v_r0), vmovn_s32(v_r1)));
2751
2752 if (dcn == 3)
2753 {
2754 uint8x8x3_t v_dst;
2755 v_dst.val[bidx] = v_b;
2756 v_dst.val[1] = v_g;
2757 v_dst.val[bidx^2] = v_r;
2758 vst3_u8(dst, v_dst);
2759 }
2760 else
2761 {
2762 uint8x8x4_t v_dst;
2763 v_dst.val[bidx] = v_b;
2764 v_dst.val[1] = v_g;
2765 v_dst.val[bidx^2] = v_r;
2766 v_dst.val[3] = v_alpha;
2767 vst4_u8(dst, v_dst);
2768 }
2769 }
2770
2771 for ( ; i < n; i += 3, dst += dcn)
2772 {
2773 uchar Y = src[i];
2774 uchar Cr = src[i+1];
2775 uchar Cb = src[i+2];
2776
2777 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
2778 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
2779 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
2780
2781 dst[bidx] = saturate_cast<uchar>(b);
2782 dst[1] = saturate_cast<uchar>(g);
2783 dst[bidx^2] = saturate_cast<uchar>(r);
2784 if( dcn == 4 )
2785 dst[3] = alpha;
2786 }
2787 }
2788 int dstcn, blueIdx;
2789 int coeffs[4];
2790
2791 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2;
2792 int16x4_t v_delta;
2793 uint8x8_t v_alpha;
2794 };
2795
2796 template <>
2797 struct YCrCb2RGB_i<ushort>
2798 {
2799 typedef ushort channel_type;
2800
YCrCb2RGB_icv::YCrCb2RGB_i2801 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2802 : dstcn(_dstcn), blueIdx(_blueIdx)
2803 {
2804 static const int coeffs0[] = {22987, -11698, -5636, 29049};
2805 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2806
2807 v_c0 = vdupq_n_s32(coeffs[0]);
2808 v_c1 = vdupq_n_s32(coeffs[1]);
2809 v_c2 = vdupq_n_s32(coeffs[2]);
2810 v_c3 = vdupq_n_s32(coeffs[3]);
2811 v_delta = vdupq_n_s32(ColorChannel<ushort>::half());
2812 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1));
2813 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
2814 v_alpha2 = vget_low_u16(v_alpha);
2815 }
2816
operator ()cv::YCrCb2RGB_i2817 void operator()(const ushort* src, ushort* dst, int n) const
2818 {
2819 int dcn = dstcn, bidx = blueIdx, i = 0;
2820 const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max();
2821 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
2822 n *= 3;
2823
2824 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
2825 {
2826 uint16x8x3_t v_src = vld3q_u16(src + i);
2827
2828 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
2829 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
2830 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
2831
2832 int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2833 v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
2834 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2835 v_g0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y);
2836 int32x4_t v_r0 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2837 v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
2838
2839 v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))),
2840 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))),
2841 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
2842
2843 int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2844 v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
2845 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2846 v_g1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y);
2847 int32x4_t v_r1 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta));
2848 v_r1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y);
2849
2850 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_b0), vqmovun_s32(v_b1));
2851 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_g0), vqmovun_s32(v_g1));
2852 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_r0), vqmovun_s32(v_r1));
2853
2854 if (dcn == 3)
2855 {
2856 uint16x8x3_t v_dst;
2857 v_dst.val[bidx] = v_b;
2858 v_dst.val[1] = v_g;
2859 v_dst.val[bidx^2] = v_r;
2860 vst3q_u16(dst, v_dst);
2861 }
2862 else
2863 {
2864 uint16x8x4_t v_dst;
2865 v_dst.val[bidx] = v_b;
2866 v_dst.val[1] = v_g;
2867 v_dst.val[bidx^2] = v_r;
2868 v_dst.val[3] = v_alpha;
2869 vst4q_u16(dst, v_dst);
2870 }
2871 }
2872
2873 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
2874 {
2875 uint16x4x3_t v_src = vld3_u16(src + i);
2876
2877 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
2878 v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
2879 v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
2880
2881 int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
2882 v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y);
2883 int32x4_t v_g = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2);
2884 v_g = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g, v_delta2), yuv_shift), v_Y);
2885 int32x4_t v_r = vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c0);
2886 v_r = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r, v_delta2), yuv_shift), v_Y);
2887
2888 uint16x4_t v_bd = vqmovun_s32(v_b);
2889 uint16x4_t v_gd = vqmovun_s32(v_g);
2890 uint16x4_t v_rd = vqmovun_s32(v_r);
2891
2892 if (dcn == 3)
2893 {
2894 uint16x4x3_t v_dst;
2895 v_dst.val[bidx] = v_bd;
2896 v_dst.val[1] = v_gd;
2897 v_dst.val[bidx^2] = v_rd;
2898 vst3_u16(dst, v_dst);
2899 }
2900 else
2901 {
2902 uint16x4x4_t v_dst;
2903 v_dst.val[bidx] = v_bd;
2904 v_dst.val[1] = v_gd;
2905 v_dst.val[bidx^2] = v_rd;
2906 v_dst.val[3] = v_alpha2;
2907 vst4_u16(dst, v_dst);
2908 }
2909 }
2910
2911 for ( ; i < n; i += 3, dst += dcn)
2912 {
2913 ushort Y = src[i];
2914 ushort Cr = src[i+1];
2915 ushort Cb = src[i+2];
2916
2917 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
2918 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
2919 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
2920
2921 dst[bidx] = saturate_cast<ushort>(b);
2922 dst[1] = saturate_cast<ushort>(g);
2923 dst[bidx^2] = saturate_cast<ushort>(r);
2924 if( dcn == 4 )
2925 dst[3] = alpha;
2926 }
2927 }
2928 int dstcn, blueIdx;
2929 int coeffs[4];
2930
2931 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta;
2932 uint16x8_t v_alpha;
2933 uint16x4_t v_alpha2;
2934 };
2935
2936 #elif CV_SSE2
2937
2938 template <>
2939 struct YCrCb2RGB_i<uchar>
2940 {
2941 typedef uchar channel_type;
2942
YCrCb2RGB_icv::YCrCb2RGB_i2943 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
2944 : dstcn(_dstcn), blueIdx(_blueIdx)
2945 {
2946 static const int coeffs0[] = {22987, -11698, -5636, 29049};
2947 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0]));
2948
2949 v_c0 = _mm_set1_epi16((short)coeffs[0]);
2950 v_c1 = _mm_set1_epi16((short)coeffs[1]);
2951 v_c2 = _mm_set1_epi16((short)coeffs[2]);
2952 v_c3 = _mm_set1_epi16((short)coeffs[3]);
2953 v_delta = _mm_set1_epi16(ColorChannel<uchar>::half());
2954 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1));
2955 v_zero = _mm_setzero_si128();
2956
2957 uchar alpha = ColorChannel<uchar>::max();
2958 v_alpha = _mm_set1_epi8(*(char *)&alpha);
2959
2960 useSSE = coeffs[0] <= std::numeric_limits<short>::max();
2961 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
2962 }
2963
2964 // 16s x 8
processcv::YCrCb2RGB_i2965 void process(__m128i v_y, __m128i v_cr, __m128i v_cb,
2966 __m128i & v_r, __m128i & v_g, __m128i & v_b) const
2967 {
2968 v_cr = _mm_sub_epi16(v_cr, v_delta);
2969 v_cb = _mm_sub_epi16(v_cb, v_delta);
2970
2971 __m128i v_y_p = _mm_unpacklo_epi16(v_y, v_zero);
2972
2973 __m128i v_mullo_3 = _mm_mullo_epi16(v_cb, v_c3);
2974 __m128i v_mullo_2 = _mm_mullo_epi16(v_cb, v_c2);
2975 __m128i v_mullo_1 = _mm_mullo_epi16(v_cr, v_c1);
2976 __m128i v_mullo_0 = _mm_mullo_epi16(v_cr, v_c0);
2977
2978 __m128i v_mulhi_3 = _mm_mulhi_epi16(v_cb, v_c3);
2979 __m128i v_mulhi_2 = _mm_mulhi_epi16(v_cb, v_c2);
2980 __m128i v_mulhi_1 = _mm_mulhi_epi16(v_cr, v_c1);
2981 __m128i v_mulhi_0 = _mm_mulhi_epi16(v_cr, v_c0);
2982
2983 __m128i v_b0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift);
2984 __m128i v_g0 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_2, v_mulhi_2),
2985 _mm_unpacklo_epi16(v_mullo_1, v_mulhi_1)), v_delta2),
2986 yuv_shift);
2987 __m128i v_r0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift);
2988
2989 v_r0 = _mm_add_epi32(v_r0, v_y_p);
2990 v_g0 = _mm_add_epi32(v_g0, v_y_p);
2991 v_b0 = _mm_add_epi32(v_b0, v_y_p);
2992
2993 v_y_p = _mm_unpackhi_epi16(v_y, v_zero);
2994
2995 __m128i v_b1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift);
2996 __m128i v_g1 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_2, v_mulhi_2),
2997 _mm_unpackhi_epi16(v_mullo_1, v_mulhi_1)), v_delta2),
2998 yuv_shift);
2999 __m128i v_r1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift);
3000
3001 v_r1 = _mm_add_epi32(v_r1, v_y_p);
3002 v_g1 = _mm_add_epi32(v_g1, v_y_p);
3003 v_b1 = _mm_add_epi32(v_b1, v_y_p);
3004
3005 v_r = _mm_packs_epi32(v_r0, v_r1);
3006 v_g = _mm_packs_epi32(v_g0, v_g1);
3007 v_b = _mm_packs_epi32(v_b0, v_b1);
3008 }
3009
operator ()cv::YCrCb2RGB_i3010 void operator()(const uchar* src, uchar* dst, int n) const
3011 {
3012 int dcn = dstcn, bidx = blueIdx, i = 0;
3013 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
3014 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
3015 n *= 3;
3016
3017 if (haveSIMD && useSSE)
3018 {
3019 for ( ; i <= n - 96; i += 96, dst += dcn * 32)
3020 {
3021 __m128i v_y0 = _mm_loadu_si128((__m128i const *)(src + i));
3022 __m128i v_y1 = _mm_loadu_si128((__m128i const *)(src + i + 16));
3023 __m128i v_cr0 = _mm_loadu_si128((__m128i const *)(src + i + 32));
3024 __m128i v_cr1 = _mm_loadu_si128((__m128i const *)(src + i + 48));
3025 __m128i v_cb0 = _mm_loadu_si128((__m128i const *)(src + i + 64));
3026 __m128i v_cb1 = _mm_loadu_si128((__m128i const *)(src + i + 80));
3027
3028 _mm_deinterleave_epi8(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
3029
3030 __m128i v_r_0 = v_zero, v_g_0 = v_zero, v_b_0 = v_zero;
3031 process(_mm_unpacklo_epi8(v_y0, v_zero),
3032 _mm_unpacklo_epi8(v_cr0, v_zero),
3033 _mm_unpacklo_epi8(v_cb0, v_zero),
3034 v_r_0, v_g_0, v_b_0);
3035
3036 __m128i v_r_1 = v_zero, v_g_1 = v_zero, v_b_1 = v_zero;
3037 process(_mm_unpackhi_epi8(v_y0, v_zero),
3038 _mm_unpackhi_epi8(v_cr0, v_zero),
3039 _mm_unpackhi_epi8(v_cb0, v_zero),
3040 v_r_1, v_g_1, v_b_1);
3041
3042 __m128i v_r0 = _mm_packus_epi16(v_r_0, v_r_1);
3043 __m128i v_g0 = _mm_packus_epi16(v_g_0, v_g_1);
3044 __m128i v_b0 = _mm_packus_epi16(v_b_0, v_b_1);
3045
3046 process(_mm_unpacklo_epi8(v_y1, v_zero),
3047 _mm_unpacklo_epi8(v_cr1, v_zero),
3048 _mm_unpacklo_epi8(v_cb1, v_zero),
3049 v_r_0, v_g_0, v_b_0);
3050
3051 process(_mm_unpackhi_epi8(v_y1, v_zero),
3052 _mm_unpackhi_epi8(v_cr1, v_zero),
3053 _mm_unpackhi_epi8(v_cb1, v_zero),
3054 v_r_1, v_g_1, v_b_1);
3055
3056 __m128i v_r1 = _mm_packus_epi16(v_r_0, v_r_1);
3057 __m128i v_g1 = _mm_packus_epi16(v_g_0, v_g_1);
3058 __m128i v_b1 = _mm_packus_epi16(v_b_0, v_b_1);
3059
3060 if (bidx == 0)
3061 {
3062 std::swap(v_r0, v_b0);
3063 std::swap(v_r1, v_b1);
3064 }
3065
3066 __m128i v_a0 = v_alpha, v_a1 = v_alpha;
3067
3068 if (dcn == 3)
3069 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
3070 else
3071 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1,
3072 v_b0, v_b1, v_a0, v_a1);
3073
3074 _mm_storeu_si128((__m128i *)(dst), v_r0);
3075 _mm_storeu_si128((__m128i *)(dst + 16), v_r1);
3076 _mm_storeu_si128((__m128i *)(dst + 32), v_g0);
3077 _mm_storeu_si128((__m128i *)(dst + 48), v_g1);
3078 _mm_storeu_si128((__m128i *)(dst + 64), v_b0);
3079 _mm_storeu_si128((__m128i *)(dst + 80), v_b1);
3080
3081 if (dcn == 4)
3082 {
3083 _mm_storeu_si128((__m128i *)(dst + 96), v_a0);
3084 _mm_storeu_si128((__m128i *)(dst + 112), v_a1);
3085 }
3086 }
3087 }
3088
3089 for ( ; i < n; i += 3, dst += dcn)
3090 {
3091 uchar Y = src[i];
3092 uchar Cr = src[i+1];
3093 uchar Cb = src[i+2];
3094
3095 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
3096 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
3097 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift);
3098
3099 dst[bidx] = saturate_cast<uchar>(b);
3100 dst[1] = saturate_cast<uchar>(g);
3101 dst[bidx^2] = saturate_cast<uchar>(r);
3102 if( dcn == 4 )
3103 dst[3] = alpha;
3104 }
3105 }
3106 int dstcn, blueIdx;
3107 int coeffs[4];
3108 bool useSSE, haveSIMD;
3109
3110 __m128i v_c0, v_c1, v_c2, v_c3, v_delta2;
3111 __m128i v_delta, v_alpha, v_zero;
3112 };
3113
3114 #endif // CV_SSE2
3115
3116 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
3117
3118 static const float sRGB2XYZ_D65[] =
3119 {
3120 0.412453f, 0.357580f, 0.180423f,
3121 0.212671f, 0.715160f, 0.072169f,
3122 0.019334f, 0.119193f, 0.950227f
3123 };
3124
3125 static const float XYZ2sRGB_D65[] =
3126 {
3127 3.240479f, -1.53715f, -0.498535f,
3128 -0.969256f, 1.875991f, 0.041556f,
3129 0.055648f, -0.204043f, 1.057311f
3130 };
3131
3132 template<typename _Tp> struct RGB2XYZ_f
3133 {
3134 typedef _Tp channel_type;
3135
RGB2XYZ_fcv::RGB2XYZ_f3136 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3137 {
3138 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
3139 if(blueIdx == 0)
3140 {
3141 std::swap(coeffs[0], coeffs[2]);
3142 std::swap(coeffs[3], coeffs[5]);
3143 std::swap(coeffs[6], coeffs[8]);
3144 }
3145 }
operator ()cv::RGB2XYZ_f3146 void operator()(const _Tp* src, _Tp* dst, int n) const
3147 {
3148 int scn = srccn;
3149 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3150 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3151 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3152
3153 n *= 3;
3154 for(int i = 0; i < n; i += 3, src += scn)
3155 {
3156 _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
3157 _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5);
3158 _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8);
3159 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
3160 }
3161 }
3162 int srccn;
3163 float coeffs[9];
3164 };
3165
3166 #if CV_NEON
3167
3168 template <>
3169 struct RGB2XYZ_f<float>
3170 {
3171 typedef float channel_type;
3172
RGB2XYZ_fcv::RGB2XYZ_f3173 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3174 {
3175 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
3176 if(blueIdx == 0)
3177 {
3178 std::swap(coeffs[0], coeffs[2]);
3179 std::swap(coeffs[3], coeffs[5]);
3180 std::swap(coeffs[6], coeffs[8]);
3181 }
3182
3183 v_c0 = vdupq_n_f32(coeffs[0]);
3184 v_c1 = vdupq_n_f32(coeffs[1]);
3185 v_c2 = vdupq_n_f32(coeffs[2]);
3186 v_c3 = vdupq_n_f32(coeffs[3]);
3187 v_c4 = vdupq_n_f32(coeffs[4]);
3188 v_c5 = vdupq_n_f32(coeffs[5]);
3189 v_c6 = vdupq_n_f32(coeffs[6]);
3190 v_c7 = vdupq_n_f32(coeffs[7]);
3191 v_c8 = vdupq_n_f32(coeffs[8]);
3192 }
3193
operator ()cv::RGB2XYZ_f3194 void operator()(const float* src, float* dst, int n) const
3195 {
3196 int scn = srccn, i = 0;
3197 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3198 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3199 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3200
3201 n *= 3;
3202
3203 if (scn == 3)
3204 for ( ; i <= n - 12; i += 12, src += 12)
3205 {
3206 float32x4x3_t v_src = vld3q_f32(src), v_dst;
3207 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
3208 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
3209 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
3210 vst3q_f32(dst + i, v_dst);
3211 }
3212 else
3213 for ( ; i <= n - 12; i += 12, src += 16)
3214 {
3215 float32x4x4_t v_src = vld4q_f32(src);
3216 float32x4x3_t v_dst;
3217 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
3218 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5);
3219 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8);
3220 vst3q_f32(dst + i, v_dst);
3221 }
3222
3223 for ( ; i < n; i += 3, src += scn)
3224 {
3225 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2);
3226 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5);
3227 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8);
3228 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
3229 }
3230 }
3231
3232 int srccn;
3233 float coeffs[9];
3234 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3235 };
3236
3237 #elif CV_SSE2
3238
3239 template <>
3240 struct RGB2XYZ_f<float>
3241 {
3242 typedef float channel_type;
3243
RGB2XYZ_fcv::RGB2XYZ_f3244 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3245 {
3246 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
3247 if(blueIdx == 0)
3248 {
3249 std::swap(coeffs[0], coeffs[2]);
3250 std::swap(coeffs[3], coeffs[5]);
3251 std::swap(coeffs[6], coeffs[8]);
3252 }
3253
3254 v_c0 = _mm_set1_ps(coeffs[0]);
3255 v_c1 = _mm_set1_ps(coeffs[1]);
3256 v_c2 = _mm_set1_ps(coeffs[2]);
3257 v_c3 = _mm_set1_ps(coeffs[3]);
3258 v_c4 = _mm_set1_ps(coeffs[4]);
3259 v_c5 = _mm_set1_ps(coeffs[5]);
3260 v_c6 = _mm_set1_ps(coeffs[6]);
3261 v_c7 = _mm_set1_ps(coeffs[7]);
3262 v_c8 = _mm_set1_ps(coeffs[8]);
3263
3264 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
3265 }
3266
processcv::RGB2XYZ_f3267 void process(__m128 v_r, __m128 v_g, __m128 v_b,
3268 __m128 & v_x, __m128 & v_y, __m128 & v_z) const
3269 {
3270 v_x = _mm_mul_ps(v_r, v_c0);
3271 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_g, v_c1));
3272 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_b, v_c2));
3273
3274 v_y = _mm_mul_ps(v_r, v_c3);
3275 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c4));
3276 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c5));
3277
3278 v_z = _mm_mul_ps(v_r, v_c6);
3279 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_g, v_c7));
3280 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_b, v_c8));
3281 }
3282
operator ()cv::RGB2XYZ_f3283 void operator()(const float* src, float* dst, int n) const
3284 {
3285 int scn = srccn, i = 0;
3286 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3287 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3288 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3289
3290 n *= 3;
3291
3292 if (haveSIMD)
3293 {
3294 for ( ; i <= n - 24; i += 24, src += 8 * scn)
3295 {
3296 __m128 v_r0 = _mm_loadu_ps(src);
3297 __m128 v_r1 = _mm_loadu_ps(src + 4);
3298 __m128 v_g0 = _mm_loadu_ps(src + 8);
3299 __m128 v_g1 = _mm_loadu_ps(src + 12);
3300 __m128 v_b0 = _mm_loadu_ps(src + 16);
3301 __m128 v_b1 = _mm_loadu_ps(src + 20);
3302
3303 if (scn == 4)
3304 {
3305 __m128 v_a0 = _mm_loadu_ps(src + 24);
3306 __m128 v_a1 = _mm_loadu_ps(src + 28);
3307
3308 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1,
3309 v_b0, v_b1, v_a0, v_a1);
3310 }
3311 else
3312 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
3313
3314 __m128 v_x0, v_y0, v_z0;
3315 process(v_r0, v_g0, v_b0,
3316 v_x0, v_y0, v_z0);
3317
3318 __m128 v_x1, v_y1, v_z1;
3319 process(v_r1, v_g1, v_b1,
3320 v_x1, v_y1, v_z1);
3321
3322 _mm_interleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1);
3323
3324 _mm_storeu_ps(dst + i, v_x0);
3325 _mm_storeu_ps(dst + i + 4, v_x1);
3326 _mm_storeu_ps(dst + i + 8, v_y0);
3327 _mm_storeu_ps(dst + i + 12, v_y1);
3328 _mm_storeu_ps(dst + i + 16, v_z0);
3329 _mm_storeu_ps(dst + i + 20, v_z1);
3330 }
3331 }
3332
3333 for ( ; i < n; i += 3, src += scn)
3334 {
3335 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2);
3336 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5);
3337 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8);
3338 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z;
3339 }
3340 }
3341
3342 int srccn;
3343 float coeffs[9];
3344 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3345 bool haveSIMD;
3346 };
3347
3348
3349 #endif
3350
3351 template<typename _Tp> struct RGB2XYZ_i
3352 {
3353 typedef _Tp channel_type;
3354
RGB2XYZ_icv::RGB2XYZ_i3355 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3356 {
3357 static const int coeffs0[] =
3358 {
3359 1689, 1465, 739,
3360 871, 2929, 296,
3361 79, 488, 3892
3362 };
3363 for( int i = 0; i < 9; i++ )
3364 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3365 if(blueIdx == 0)
3366 {
3367 std::swap(coeffs[0], coeffs[2]);
3368 std::swap(coeffs[3], coeffs[5]);
3369 std::swap(coeffs[6], coeffs[8]);
3370 }
3371 }
operator ()cv::RGB2XYZ_i3372 void operator()(const _Tp* src, _Tp* dst, int n) const
3373 {
3374 int scn = srccn;
3375 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3376 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3377 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3378 n *= 3;
3379
3380 for(int i = 0; i < n; i += 3, src += scn)
3381 {
3382 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
3383 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
3384 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
3385 dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y);
3386 dst[i+2] = saturate_cast<_Tp>(Z);
3387 }
3388 }
3389 int srccn;
3390 int coeffs[9];
3391 };
3392
3393 #if CV_NEON
3394
3395 template <>
3396 struct RGB2XYZ_i<uchar>
3397 {
3398 typedef uchar channel_type;
3399
RGB2XYZ_icv::RGB2XYZ_i3400 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3401 {
3402 static const int coeffs0[] =
3403 {
3404 1689, 1465, 739,
3405 871, 2929, 296,
3406 79, 488, 3892
3407 };
3408 for( int i = 0; i < 9; i++ )
3409 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3410 if(blueIdx == 0)
3411 {
3412 std::swap(coeffs[0], coeffs[2]);
3413 std::swap(coeffs[3], coeffs[5]);
3414 std::swap(coeffs[6], coeffs[8]);
3415 }
3416
3417 v_c0 = vdup_n_u16(coeffs[0]);
3418 v_c1 = vdup_n_u16(coeffs[1]);
3419 v_c2 = vdup_n_u16(coeffs[2]);
3420 v_c3 = vdup_n_u16(coeffs[3]);
3421 v_c4 = vdup_n_u16(coeffs[4]);
3422 v_c5 = vdup_n_u16(coeffs[5]);
3423 v_c6 = vdup_n_u16(coeffs[6]);
3424 v_c7 = vdup_n_u16(coeffs[7]);
3425 v_c8 = vdup_n_u16(coeffs[8]);
3426 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
3427 }
operator ()cv::RGB2XYZ_i3428 void operator()(const uchar * src, uchar * dst, int n) const
3429 {
3430 int scn = srccn, i = 0;
3431 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3432 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3433 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3434 n *= 3;
3435
3436 for ( ; i <= n - 24; i += 24, src += scn * 8)
3437 {
3438 uint8x8x3_t v_dst;
3439 uint16x8x3_t v_src16;
3440
3441 if (scn == 3)
3442 {
3443 uint8x8x3_t v_src = vld3_u8(src);
3444 v_src16.val[0] = vmovl_u8(v_src.val[0]);
3445 v_src16.val[1] = vmovl_u8(v_src.val[1]);
3446 v_src16.val[2] = vmovl_u8(v_src.val[2]);
3447 }
3448 else
3449 {
3450 uint8x8x4_t v_src = vld4_u8(src);
3451 v_src16.val[0] = vmovl_u8(v_src.val[0]);
3452 v_src16.val[1] = vmovl_u8(v_src.val[1]);
3453 v_src16.val[2] = vmovl_u8(v_src.val[2]);
3454 }
3455
3456 uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]),
3457 v_s1 = vget_low_u16(v_src16.val[1]),
3458 v_s2 = vget_low_u16(v_src16.val[2]);
3459
3460 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3461 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3462 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3463 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
3464 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
3465 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
3466
3467 v_s0 = vget_high_u16(v_src16.val[0]),
3468 v_s1 = vget_high_u16(v_src16.val[1]),
3469 v_s2 = vget_high_u16(v_src16.val[2]);
3470
3471 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3472 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3473 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3474 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
3475 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
3476 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
3477
3478 v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1)));
3479 v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1)));
3480 v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1)));
3481
3482 vst3_u8(dst + i, v_dst);
3483 }
3484
3485 for ( ; i < n; i += 3, src += scn)
3486 {
3487 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
3488 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
3489 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
3490 dst[i] = saturate_cast<uchar>(X);
3491 dst[i+1] = saturate_cast<uchar>(Y);
3492 dst[i+2] = saturate_cast<uchar>(Z);
3493 }
3494 }
3495
3496 int srccn, coeffs[9];
3497 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3498 uint32x4_t v_delta;
3499 };
3500
3501 template <>
3502 struct RGB2XYZ_i<ushort>
3503 {
3504 typedef ushort channel_type;
3505
RGB2XYZ_icv::RGB2XYZ_i3506 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
3507 {
3508 static const int coeffs0[] =
3509 {
3510 1689, 1465, 739,
3511 871, 2929, 296,
3512 79, 488, 3892
3513 };
3514 for( int i = 0; i < 9; i++ )
3515 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3516 if(blueIdx == 0)
3517 {
3518 std::swap(coeffs[0], coeffs[2]);
3519 std::swap(coeffs[3], coeffs[5]);
3520 std::swap(coeffs[6], coeffs[8]);
3521 }
3522
3523 v_c0 = vdup_n_u16(coeffs[0]);
3524 v_c1 = vdup_n_u16(coeffs[1]);
3525 v_c2 = vdup_n_u16(coeffs[2]);
3526 v_c3 = vdup_n_u16(coeffs[3]);
3527 v_c4 = vdup_n_u16(coeffs[4]);
3528 v_c5 = vdup_n_u16(coeffs[5]);
3529 v_c6 = vdup_n_u16(coeffs[6]);
3530 v_c7 = vdup_n_u16(coeffs[7]);
3531 v_c8 = vdup_n_u16(coeffs[8]);
3532 v_delta = vdupq_n_u32(1 << (xyz_shift - 1));
3533 }
3534
operator ()cv::RGB2XYZ_i3535 void operator()(const ushort * src, ushort * dst, int n) const
3536 {
3537 int scn = srccn, i = 0;
3538 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3539 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3540 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3541 n *= 3;
3542
3543 for ( ; i <= n - 24; i += 24, src += scn * 8)
3544 {
3545 uint16x8x3_t v_src, v_dst;
3546
3547 if (scn == 3)
3548 v_src = vld3q_u16(src);
3549 else
3550 {
3551 uint16x8x4_t v_src4 = vld4q_u16(src);
3552 v_src.val[0] = v_src4.val[0];
3553 v_src.val[1] = v_src4.val[1];
3554 v_src.val[2] = v_src4.val[2];
3555 }
3556
3557 uint16x4_t v_s0 = vget_low_u16(v_src.val[0]),
3558 v_s1 = vget_low_u16(v_src.val[1]),
3559 v_s2 = vget_low_u16(v_src.val[2]);
3560
3561 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3562 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3563 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3564 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift);
3565 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift);
3566 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift);
3567
3568 v_s0 = vget_high_u16(v_src.val[0]),
3569 v_s1 = vget_high_u16(v_src.val[1]),
3570 v_s2 = vget_high_u16(v_src.val[2]);
3571
3572 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3573 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3574 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3575 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift);
3576 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift);
3577 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift);
3578
3579 v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1));
3580 v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1));
3581 v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1));
3582
3583 vst3q_u16(dst + i, v_dst);
3584 }
3585
3586 for ( ; i <= n - 12; i += 12, src += scn * 4)
3587 {
3588 uint16x4x3_t v_dst;
3589 uint16x4_t v_s0, v_s1, v_s2;
3590
3591 if (scn == 3)
3592 {
3593 uint16x4x3_t v_src = vld3_u16(src);
3594 v_s0 = v_src.val[0];
3595 v_s1 = v_src.val[1];
3596 v_s2 = v_src.val[2];
3597 }
3598 else
3599 {
3600 uint16x4x4_t v_src = vld4_u16(src);
3601 v_s0 = v_src.val[0];
3602 v_s1 = v_src.val[1];
3603 v_s2 = v_src.val[2];
3604 }
3605
3606 uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3607 uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3608 uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3609
3610 v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift));
3611 v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift));
3612 v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift));
3613
3614 vst3_u16(dst + i, v_dst);
3615 }
3616
3617 for ( ; i < n; i += 3, src += scn)
3618 {
3619 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift);
3620 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift);
3621 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift);
3622 dst[i] = saturate_cast<ushort>(X);
3623 dst[i+1] = saturate_cast<ushort>(Y);
3624 dst[i+2] = saturate_cast<ushort>(Z);
3625 }
3626 }
3627
3628 int srccn, coeffs[9];
3629 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3630 uint32x4_t v_delta;
3631 };
3632
3633 #endif
3634
3635 template<typename _Tp> struct XYZ2RGB_f
3636 {
3637 typedef _Tp channel_type;
3638
XYZ2RGB_fcv::XYZ2RGB_f3639 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
3640 : dstcn(_dstcn), blueIdx(_blueIdx)
3641 {
3642 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
3643 if(blueIdx == 0)
3644 {
3645 std::swap(coeffs[0], coeffs[6]);
3646 std::swap(coeffs[1], coeffs[7]);
3647 std::swap(coeffs[2], coeffs[8]);
3648 }
3649 }
3650
operator ()cv::XYZ2RGB_f3651 void operator()(const _Tp* src, _Tp* dst, int n) const
3652 {
3653 int dcn = dstcn;
3654 _Tp alpha = ColorChannel<_Tp>::max();
3655 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3656 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3657 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3658 n *= 3;
3659 for(int i = 0; i < n; i += 3, dst += dcn)
3660 {
3661 _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2);
3662 _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5);
3663 _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8);
3664 dst[0] = B; dst[1] = G; dst[2] = R;
3665 if( dcn == 4 )
3666 dst[3] = alpha;
3667 }
3668 }
3669 int dstcn, blueIdx;
3670 float coeffs[9];
3671 };
3672
3673 #if CV_SSE2
3674
3675 template <>
3676 struct XYZ2RGB_f<float>
3677 {
3678 typedef float channel_type;
3679
XYZ2RGB_fcv::XYZ2RGB_f3680 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
3681 : dstcn(_dstcn), blueIdx(_blueIdx)
3682 {
3683 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
3684 if(blueIdx == 0)
3685 {
3686 std::swap(coeffs[0], coeffs[6]);
3687 std::swap(coeffs[1], coeffs[7]);
3688 std::swap(coeffs[2], coeffs[8]);
3689 }
3690
3691 v_c0 = _mm_set1_ps(coeffs[0]);
3692 v_c1 = _mm_set1_ps(coeffs[1]);
3693 v_c2 = _mm_set1_ps(coeffs[2]);
3694 v_c3 = _mm_set1_ps(coeffs[3]);
3695 v_c4 = _mm_set1_ps(coeffs[4]);
3696 v_c5 = _mm_set1_ps(coeffs[5]);
3697 v_c6 = _mm_set1_ps(coeffs[6]);
3698 v_c7 = _mm_set1_ps(coeffs[7]);
3699 v_c8 = _mm_set1_ps(coeffs[8]);
3700
3701 v_alpha = _mm_set1_ps(ColorChannel<float>::max());
3702
3703 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
3704 }
3705
processcv::XYZ2RGB_f3706 void process(__m128 v_x, __m128 v_y, __m128 v_z,
3707 __m128 & v_r, __m128 & v_g, __m128 & v_b) const
3708 {
3709 v_b = _mm_mul_ps(v_x, v_c0);
3710 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_y, v_c1));
3711 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_z, v_c2));
3712
3713 v_g = _mm_mul_ps(v_x, v_c3);
3714 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_y, v_c4));
3715 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_z, v_c5));
3716
3717 v_r = _mm_mul_ps(v_x, v_c6);
3718 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_y, v_c7));
3719 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_z, v_c8));
3720 }
3721
operator ()cv::XYZ2RGB_f3722 void operator()(const float* src, float* dst, int n) const
3723 {
3724 int dcn = dstcn;
3725 float alpha = ColorChannel<float>::max();
3726 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3727 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3728 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3729 n *= 3;
3730 int i = 0;
3731
3732 if (haveSIMD)
3733 {
3734 for ( ; i <= n - 24; i += 24, dst += 8 * dcn)
3735 {
3736 __m128 v_x0 = _mm_loadu_ps(src + i);
3737 __m128 v_x1 = _mm_loadu_ps(src + i + 4);
3738 __m128 v_y0 = _mm_loadu_ps(src + i + 8);
3739 __m128 v_y1 = _mm_loadu_ps(src + i + 12);
3740 __m128 v_z0 = _mm_loadu_ps(src + i + 16);
3741 __m128 v_z1 = _mm_loadu_ps(src + i + 20);
3742
3743 _mm_deinterleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1);
3744
3745 __m128 v_r0, v_g0, v_b0;
3746 process(v_x0, v_y0, v_z0,
3747 v_r0, v_g0, v_b0);
3748
3749 __m128 v_r1, v_g1, v_b1;
3750 process(v_x1, v_y1, v_z1,
3751 v_r1, v_g1, v_b1);
3752
3753 __m128 v_a0 = v_alpha, v_a1 = v_alpha;
3754
3755 if (dcn == 4)
3756 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1,
3757 v_r0, v_r1, v_a0, v_a1);
3758 else
3759 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, v_r0, v_r1);
3760
3761 _mm_storeu_ps(dst, v_b0);
3762 _mm_storeu_ps(dst + 4, v_b1);
3763 _mm_storeu_ps(dst + 8, v_g0);
3764 _mm_storeu_ps(dst + 12, v_g1);
3765 _mm_storeu_ps(dst + 16, v_r0);
3766 _mm_storeu_ps(dst + 20, v_r1);
3767
3768 if (dcn == 4)
3769 {
3770 _mm_storeu_ps(dst + 24, v_a0);
3771 _mm_storeu_ps(dst + 28, v_a1);
3772 }
3773 }
3774
3775 }
3776
3777 for( ; i < n; i += 3, dst += dcn)
3778 {
3779 float B = src[i]*C0 + src[i+1]*C1 + src[i+2]*C2;
3780 float G = src[i]*C3 + src[i+1]*C4 + src[i+2]*C5;
3781 float R = src[i]*C6 + src[i+1]*C7 + src[i+2]*C8;
3782 dst[0] = B; dst[1] = G; dst[2] = R;
3783 if( dcn == 4 )
3784 dst[3] = alpha;
3785 }
3786 }
3787 int dstcn, blueIdx;
3788 float coeffs[9];
3789
3790 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3791 __m128 v_alpha;
3792 bool haveSIMD;
3793 };
3794
3795 #endif // CV_SSE2
3796
3797
3798 template<typename _Tp> struct XYZ2RGB_i
3799 {
3800 typedef _Tp channel_type;
3801
XYZ2RGB_icv::XYZ2RGB_i3802 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
3803 : dstcn(_dstcn), blueIdx(_blueIdx)
3804 {
3805 static const int coeffs0[] =
3806 {
3807 13273, -6296, -2042,
3808 -3970, 7684, 170,
3809 228, -836, 4331
3810 };
3811 for(int i = 0; i < 9; i++)
3812 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3813
3814 if(blueIdx == 0)
3815 {
3816 std::swap(coeffs[0], coeffs[6]);
3817 std::swap(coeffs[1], coeffs[7]);
3818 std::swap(coeffs[2], coeffs[8]);
3819 }
3820 }
operator ()cv::XYZ2RGB_i3821 void operator()(const _Tp* src, _Tp* dst, int n) const
3822 {
3823 int dcn = dstcn;
3824 _Tp alpha = ColorChannel<_Tp>::max();
3825 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3826 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3827 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3828 n *= 3;
3829 for(int i = 0; i < n; i += 3, dst += dcn)
3830 {
3831 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
3832 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
3833 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
3834 dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G);
3835 dst[2] = saturate_cast<_Tp>(R);
3836 if( dcn == 4 )
3837 dst[3] = alpha;
3838 }
3839 }
3840 int dstcn, blueIdx;
3841 int coeffs[9];
3842 };
3843
3844 #if CV_NEON
3845
3846 template <>
3847 struct XYZ2RGB_i<uchar>
3848 {
3849 typedef uchar channel_type;
3850
XYZ2RGB_icv::XYZ2RGB_i3851 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
3852 : dstcn(_dstcn), blueIdx(_blueIdx)
3853 {
3854 static const int coeffs0[] =
3855 {
3856 13273, -6296, -2042,
3857 -3970, 7684, 170,
3858 228, -836, 4331
3859 };
3860 for(int i = 0; i < 9; i++)
3861 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3862
3863 if(blueIdx == 0)
3864 {
3865 std::swap(coeffs[0], coeffs[6]);
3866 std::swap(coeffs[1], coeffs[7]);
3867 std::swap(coeffs[2], coeffs[8]);
3868 }
3869
3870 v_c0 = vdup_n_s16(coeffs[0]);
3871 v_c1 = vdup_n_s16(coeffs[1]);
3872 v_c2 = vdup_n_s16(coeffs[2]);
3873 v_c3 = vdup_n_s16(coeffs[3]);
3874 v_c4 = vdup_n_s16(coeffs[4]);
3875 v_c5 = vdup_n_s16(coeffs[5]);
3876 v_c6 = vdup_n_s16(coeffs[6]);
3877 v_c7 = vdup_n_s16(coeffs[7]);
3878 v_c8 = vdup_n_s16(coeffs[8]);
3879 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
3880 v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel<uchar>::max()));
3881 }
3882
operator ()cv::XYZ2RGB_i3883 void operator()(const uchar* src, uchar* dst, int n) const
3884 {
3885 int dcn = dstcn, i = 0;
3886 uchar alpha = ColorChannel<uchar>::max();
3887 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
3888 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
3889 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
3890 n *= 3;
3891
3892 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
3893 {
3894 uint8x8x3_t v_src = vld3_u8(src + i);
3895 int16x8x3_t v_src16;
3896 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0]));
3897 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1]));
3898 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
3899
3900 int16x4_t v_s0 = vget_low_s16(v_src16.val[0]),
3901 v_s1 = vget_low_s16(v_src16.val[1]),
3902 v_s2 = vget_low_s16(v_src16.val[2]);
3903
3904 int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3905 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3906 int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3907 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
3908 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
3909 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
3910
3911 v_s0 = vget_high_s16(v_src16.val[0]),
3912 v_s1 = vget_high_s16(v_src16.val[1]),
3913 v_s2 = vget_high_s16(v_src16.val[2]);
3914
3915 int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
3916 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
3917 int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
3918 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
3919 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
3920 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
3921
3922 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1)));
3923 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
3924 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1)));
3925
3926 if (dcn == 3)
3927 {
3928 uint8x8x3_t v_dst;
3929 v_dst.val[0] = v_b;
3930 v_dst.val[1] = v_g;
3931 v_dst.val[2] = v_r;
3932 vst3_u8(dst, v_dst);
3933 }
3934 else
3935 {
3936 uint8x8x4_t v_dst;
3937 v_dst.val[0] = v_b;
3938 v_dst.val[1] = v_g;
3939 v_dst.val[2] = v_r;
3940 v_dst.val[3] = v_alpha;
3941 vst4_u8(dst, v_dst);
3942 }
3943 }
3944
3945 for ( ; i < n; i += 3, dst += dcn)
3946 {
3947 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
3948 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
3949 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
3950 dst[0] = saturate_cast<uchar>(B); dst[1] = saturate_cast<uchar>(G);
3951 dst[2] = saturate_cast<uchar>(R);
3952 if( dcn == 4 )
3953 dst[3] = alpha;
3954 }
3955 }
3956 int dstcn, blueIdx;
3957 int coeffs[9];
3958
3959 int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8;
3960 uint8x8_t v_alpha;
3961 int32x4_t v_delta;
3962 };
3963
3964 template <>
3965 struct XYZ2RGB_i<ushort>
3966 {
3967 typedef ushort channel_type;
3968
XYZ2RGB_icv::XYZ2RGB_i3969 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs)
3970 : dstcn(_dstcn), blueIdx(_blueIdx)
3971 {
3972 static const int coeffs0[] =
3973 {
3974 13273, -6296, -2042,
3975 -3970, 7684, 170,
3976 228, -836, 4331
3977 };
3978 for(int i = 0; i < 9; i++)
3979 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i];
3980
3981 if(blueIdx == 0)
3982 {
3983 std::swap(coeffs[0], coeffs[6]);
3984 std::swap(coeffs[1], coeffs[7]);
3985 std::swap(coeffs[2], coeffs[8]);
3986 }
3987
3988 v_c0 = vdupq_n_s32(coeffs[0]);
3989 v_c1 = vdupq_n_s32(coeffs[1]);
3990 v_c2 = vdupq_n_s32(coeffs[2]);
3991 v_c3 = vdupq_n_s32(coeffs[3]);
3992 v_c4 = vdupq_n_s32(coeffs[4]);
3993 v_c5 = vdupq_n_s32(coeffs[5]);
3994 v_c6 = vdupq_n_s32(coeffs[6]);
3995 v_c7 = vdupq_n_s32(coeffs[7]);
3996 v_c8 = vdupq_n_s32(coeffs[8]);
3997 v_delta = vdupq_n_s32(1 << (xyz_shift - 1));
3998 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max());
3999 v_alpha2 = vget_low_u16(v_alpha);
4000 }
4001
operator ()cv::XYZ2RGB_i4002 void operator()(const ushort* src, ushort* dst, int n) const
4003 {
4004 int dcn = dstcn, i = 0;
4005 ushort alpha = ColorChannel<ushort>::max();
4006 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
4007 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
4008 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
4009 n *= 3;
4010
4011 for ( ; i <= n - 24; i += 24, dst += dcn * 8)
4012 {
4013 uint16x8x3_t v_src = vld3q_u16(src + i);
4014 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
4015 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))),
4016 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2])));
4017
4018 int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
4019 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
4020 int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
4021 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift);
4022 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift);
4023 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift);
4024
4025 v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0])));
4026 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1])));
4027 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2])));
4028
4029 int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
4030 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
4031 int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
4032 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift);
4033 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift);
4034 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift);
4035
4036 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1));
4037 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
4038 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1));
4039
4040 if (dcn == 3)
4041 {
4042 uint16x8x3_t v_dst;
4043 v_dst.val[0] = v_b;
4044 v_dst.val[1] = v_g;
4045 v_dst.val[2] = v_r;
4046 vst3q_u16(dst, v_dst);
4047 }
4048 else
4049 {
4050 uint16x8x4_t v_dst;
4051 v_dst.val[0] = v_b;
4052 v_dst.val[1] = v_g;
4053 v_dst.val[2] = v_r;
4054 v_dst.val[3] = v_alpha;
4055 vst4q_u16(dst, v_dst);
4056 }
4057 }
4058
4059 for ( ; i <= n - 12; i += 12, dst += dcn * 4)
4060 {
4061 uint16x4x3_t v_src = vld3_u16(src + i);
4062 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
4063 v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])),
4064 v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2]));
4065
4066 int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2);
4067 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5);
4068 int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8);
4069 v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift);
4070 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift);
4071 v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift);
4072
4073 uint16x4_t v_b = vqmovun_s32(v_X);
4074 uint16x4_t v_g = vqmovun_s32(v_Y);
4075 uint16x4_t v_r = vqmovun_s32(v_Z);
4076
4077 if (dcn == 3)
4078 {
4079 uint16x4x3_t v_dst;
4080 v_dst.val[0] = v_b;
4081 v_dst.val[1] = v_g;
4082 v_dst.val[2] = v_r;
4083 vst3_u16(dst, v_dst);
4084 }
4085 else
4086 {
4087 uint16x4x4_t v_dst;
4088 v_dst.val[0] = v_b;
4089 v_dst.val[1] = v_g;
4090 v_dst.val[2] = v_r;
4091 v_dst.val[3] = v_alpha2;
4092 vst4_u16(dst, v_dst);
4093 }
4094 }
4095
4096 for ( ; i < n; i += 3, dst += dcn)
4097 {
4098 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift);
4099 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift);
4100 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift);
4101 dst[0] = saturate_cast<ushort>(B); dst[1] = saturate_cast<ushort>(G);
4102 dst[2] = saturate_cast<ushort>(R);
4103 if( dcn == 4 )
4104 dst[3] = alpha;
4105 }
4106 }
4107 int dstcn, blueIdx;
4108 int coeffs[9];
4109
4110 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta;
4111 uint16x4_t v_alpha2;
4112 uint16x8_t v_alpha;
4113 };
4114
4115 #endif
4116
4117 ////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
4118
4119
4120 struct RGB2HSV_b
4121 {
4122 typedef uchar channel_type;
4123
RGB2HSV_bcv::RGB2HSV_b4124 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange)
4125 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange)
4126 {
4127 CV_Assert( hrange == 180 || hrange == 256 );
4128 }
4129
operator ()cv::RGB2HSV_b4130 void operator()(const uchar* src, uchar* dst, int n) const
4131 {
4132 int i, bidx = blueIdx, scn = srccn;
4133 const int hsv_shift = 12;
4134
4135 static int sdiv_table[256];
4136 static int hdiv_table180[256];
4137 static int hdiv_table256[256];
4138 static volatile bool initialized = false;
4139
4140 int hr = hrange;
4141 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256;
4142 n *= 3;
4143
4144 if( !initialized )
4145 {
4146 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
4147 for( i = 1; i < 256; i++ )
4148 {
4149 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i));
4150 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i));
4151 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i));
4152 }
4153 initialized = true;
4154 }
4155
4156 for( i = 0; i < n; i += 3, src += scn )
4157 {
4158 int b = src[bidx], g = src[1], r = src[bidx^2];
4159 int h, s, v = b;
4160 int vmin = b, diff;
4161 int vr, vg;
4162
4163 CV_CALC_MAX_8U( v, g );
4164 CV_CALC_MAX_8U( v, r );
4165 CV_CALC_MIN_8U( vmin, g );
4166 CV_CALC_MIN_8U( vmin, r );
4167
4168 diff = v - vmin;
4169 vr = v == r ? -1 : 0;
4170 vg = v == g ? -1 : 0;
4171
4172 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
4173 h = (vr & (g - b)) +
4174 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
4175 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
4176 h += h < 0 ? hr : 0;
4177
4178 dst[i] = saturate_cast<uchar>(h);
4179 dst[i+1] = (uchar)s;
4180 dst[i+2] = (uchar)v;
4181 }
4182 }
4183
4184 int srccn, blueIdx, hrange;
4185 };
4186
4187
4188 struct RGB2HSV_f
4189 {
4190 typedef float channel_type;
4191
RGB2HSV_fcv::RGB2HSV_f4192 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
4193 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
4194
operator ()cv::RGB2HSV_f4195 void operator()(const float* src, float* dst, int n) const
4196 {
4197 int i, bidx = blueIdx, scn = srccn;
4198 float hscale = hrange*(1.f/360.f);
4199 n *= 3;
4200
4201 for( i = 0; i < n; i += 3, src += scn )
4202 {
4203 float b = src[bidx], g = src[1], r = src[bidx^2];
4204 float h, s, v;
4205
4206 float vmin, diff;
4207
4208 v = vmin = r;
4209 if( v < g ) v = g;
4210 if( v < b ) v = b;
4211 if( vmin > g ) vmin = g;
4212 if( vmin > b ) vmin = b;
4213
4214 diff = v - vmin;
4215 s = diff/(float)(fabs(v) + FLT_EPSILON);
4216 diff = (float)(60./(diff + FLT_EPSILON));
4217 if( v == r )
4218 h = (g - b)*diff;
4219 else if( v == g )
4220 h = (b - r)*diff + 120.f;
4221 else
4222 h = (r - g)*diff + 240.f;
4223
4224 if( h < 0 ) h += 360.f;
4225
4226 dst[i] = h*hscale;
4227 dst[i+1] = s;
4228 dst[i+2] = v;
4229 }
4230 }
4231
4232 int srccn, blueIdx;
4233 float hrange;
4234 };
4235
4236
4237 struct HSV2RGB_f
4238 {
4239 typedef float channel_type;
4240
HSV2RGB_fcv::HSV2RGB_f4241 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange)
4242 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
4243
operator ()cv::HSV2RGB_f4244 void operator()(const float* src, float* dst, int n) const
4245 {
4246 int i, bidx = blueIdx, dcn = dstcn;
4247 float _hscale = hscale;
4248 float alpha = ColorChannel<float>::max();
4249 n *= 3;
4250
4251 for( i = 0; i < n; i += 3, dst += dcn )
4252 {
4253 float h = src[i], s = src[i+1], v = src[i+2];
4254 float b, g, r;
4255
4256 if( s == 0 )
4257 b = g = r = v;
4258 else
4259 {
4260 static const int sector_data[][3]=
4261 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
4262 float tab[4];
4263 int sector;
4264 h *= _hscale;
4265 if( h < 0 )
4266 do h += 6; while( h < 0 );
4267 else if( h >= 6 )
4268 do h -= 6; while( h >= 6 );
4269 sector = cvFloor(h);
4270 h -= sector;
4271 if( (unsigned)sector >= 6u )
4272 {
4273 sector = 0;
4274 h = 0.f;
4275 }
4276
4277 tab[0] = v;
4278 tab[1] = v*(1.f - s);
4279 tab[2] = v*(1.f - s*h);
4280 tab[3] = v*(1.f - s*(1.f - h));
4281
4282 b = tab[sector_data[sector][0]];
4283 g = tab[sector_data[sector][1]];
4284 r = tab[sector_data[sector][2]];
4285 }
4286
4287 dst[bidx] = b;
4288 dst[1] = g;
4289 dst[bidx^2] = r;
4290 if( dcn == 4 )
4291 dst[3] = alpha;
4292 }
4293 }
4294
4295 int dstcn, blueIdx;
4296 float hscale;
4297 };
4298
4299
4300 struct HSV2RGB_b
4301 {
4302 typedef uchar channel_type;
4303
HSV2RGB_bcv::HSV2RGB_b4304 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange)
4305 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
4306 {
4307 #if CV_NEON
4308 v_scale_inv = vdupq_n_f32(1.f/255.f);
4309 v_scale = vdupq_n_f32(255.f);
4310 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4311 #elif CV_SSE2
4312 v_scale_inv = _mm_set1_ps(1.f/255.f);
4313 v_scale = _mm_set1_ps(255.0f);
4314 v_zero = _mm_setzero_si128();
4315 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
4316 #endif
4317 }
4318
4319 #if CV_SSE2
4320 // 16s x 8
processcv::HSV2RGB_b4321 void process(__m128i v_r, __m128i v_g, __m128i v_b,
4322 float * buf) const
4323 {
4324 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero));
4325 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero));
4326 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero));
4327
4328 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero));
4329 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero));
4330 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero));
4331
4332 v_g0 = _mm_mul_ps(v_g0, v_scale_inv);
4333 v_b0 = _mm_mul_ps(v_b0, v_scale_inv);
4334
4335 v_g1 = _mm_mul_ps(v_g1, v_scale_inv);
4336 v_b1 = _mm_mul_ps(v_b1, v_scale_inv);
4337
4338 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
4339
4340 _mm_store_ps(buf, v_r0);
4341 _mm_store_ps(buf + 4, v_r1);
4342 _mm_store_ps(buf + 8, v_g0);
4343 _mm_store_ps(buf + 12, v_g1);
4344 _mm_store_ps(buf + 16, v_b0);
4345 _mm_store_ps(buf + 20, v_b1);
4346 }
4347 #endif
4348
operator ()cv::HSV2RGB_b4349 void operator()(const uchar* src, uchar* dst, int n) const
4350 {
4351 int i, j, dcn = dstcn;
4352 uchar alpha = ColorChannel<uchar>::max();
4353 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
4354
4355 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
4356 {
4357 int dn = std::min(n - i, (int)BLOCK_SIZE);
4358 j = 0;
4359
4360 #if CV_NEON
4361 for ( ; j <= (dn - 8) * 3; j += 24)
4362 {
4363 uint8x8x3_t v_src = vld3_u8(src + j);
4364 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
4365 v_t1 = vmovl_u8(v_src.val[1]),
4366 v_t2 = vmovl_u8(v_src.val[2]);
4367
4368 float32x4x3_t v_dst;
4369 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
4370 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
4371 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
4372 vst3q_f32(buf + j, v_dst);
4373
4374 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
4375 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
4376 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
4377 vst3q_f32(buf + j + 12, v_dst);
4378 }
4379 #elif CV_SSE2
4380 if (haveSIMD)
4381 {
4382 for ( ; j <= (dn - 32) * 3; j += 96)
4383 {
4384 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j));
4385 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16));
4386 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32));
4387 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48));
4388 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64));
4389 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80));
4390
4391 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
4392
4393 process(_mm_unpacklo_epi8(v_r0, v_zero),
4394 _mm_unpacklo_epi8(v_g0, v_zero),
4395 _mm_unpacklo_epi8(v_b0, v_zero),
4396 buf + j);
4397
4398 process(_mm_unpackhi_epi8(v_r0, v_zero),
4399 _mm_unpackhi_epi8(v_g0, v_zero),
4400 _mm_unpackhi_epi8(v_b0, v_zero),
4401 buf + j + 24);
4402
4403 process(_mm_unpacklo_epi8(v_r1, v_zero),
4404 _mm_unpacklo_epi8(v_g1, v_zero),
4405 _mm_unpacklo_epi8(v_b1, v_zero),
4406 buf + j + 48);
4407
4408 process(_mm_unpackhi_epi8(v_r1, v_zero),
4409 _mm_unpackhi_epi8(v_g1, v_zero),
4410 _mm_unpackhi_epi8(v_b1, v_zero),
4411 buf + j + 72);
4412 }
4413 }
4414 #endif
4415
4416 for( ; j < dn*3; j += 3 )
4417 {
4418 buf[j] = src[j];
4419 buf[j+1] = src[j+1]*(1.f/255.f);
4420 buf[j+2] = src[j+2]*(1.f/255.f);
4421 }
4422 cvt(buf, buf, dn);
4423
4424 j = 0;
4425 #if CV_NEON
4426 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
4427 {
4428 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4429 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4430 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4431 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
4432 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
4433 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
4434 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
4435
4436 if (dcn == 4)
4437 {
4438 uint8x8x4_t v_dst;
4439 v_dst.val[0] = v_dst0;
4440 v_dst.val[1] = v_dst1;
4441 v_dst.val[2] = v_dst2;
4442 v_dst.val[3] = v_alpha;
4443 vst4_u8(dst, v_dst);
4444 }
4445 else
4446 {
4447 uint8x8x3_t v_dst;
4448 v_dst.val[0] = v_dst0;
4449 v_dst.val[1] = v_dst1;
4450 v_dst.val[2] = v_dst2;
4451 vst3_u8(dst, v_dst);
4452 }
4453 }
4454 #elif CV_SSE2
4455 if (dcn == 3 && haveSIMD)
4456 {
4457 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16)
4458 {
4459 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
4460 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
4461 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
4462 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale);
4463
4464 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0),
4465 _mm_cvtps_epi32(v_src1));
4466 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2),
4467 _mm_cvtps_epi32(v_src3));
4468
4469 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
4470 }
4471
4472 int jr = j % 3;
4473 if (jr)
4474 dst -= jr, j -= jr;
4475 }
4476 #endif
4477
4478 for( ; j < dn*3; j += 3, dst += dcn )
4479 {
4480 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
4481 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
4482 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
4483 if( dcn == 4 )
4484 dst[3] = alpha;
4485 }
4486 }
4487 }
4488
4489 int dstcn;
4490 HSV2RGB_f cvt;
4491 #if CV_NEON
4492 float32x4_t v_scale, v_scale_inv;
4493 uint8x8_t v_alpha;
4494 #elif CV_SSE2
4495 __m128 v_scale_inv, v_scale;
4496 __m128i v_zero;
4497 bool haveSIMD;
4498 #endif
4499 };
4500
4501
4502 ///////////////////////////////////// RGB <-> HLS ////////////////////////////////////////
4503
4504 struct RGB2HLS_f
4505 {
4506 typedef float channel_type;
4507
RGB2HLS_fcv::RGB2HLS_f4508 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange)
4509 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
4510
operator ()cv::RGB2HLS_f4511 void operator()(const float* src, float* dst, int n) const
4512 {
4513 int i, bidx = blueIdx, scn = srccn;
4514 float hscale = hrange*(1.f/360.f);
4515 n *= 3;
4516
4517 for( i = 0; i < n; i += 3, src += scn )
4518 {
4519 float b = src[bidx], g = src[1], r = src[bidx^2];
4520 float h = 0.f, s = 0.f, l;
4521 float vmin, vmax, diff;
4522
4523 vmax = vmin = r;
4524 if( vmax < g ) vmax = g;
4525 if( vmax < b ) vmax = b;
4526 if( vmin > g ) vmin = g;
4527 if( vmin > b ) vmin = b;
4528
4529 diff = vmax - vmin;
4530 l = (vmax + vmin)*0.5f;
4531
4532 if( diff > FLT_EPSILON )
4533 {
4534 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
4535 diff = 60.f/diff;
4536
4537 if( vmax == r )
4538 h = (g - b)*diff;
4539 else if( vmax == g )
4540 h = (b - r)*diff + 120.f;
4541 else
4542 h = (r - g)*diff + 240.f;
4543
4544 if( h < 0.f ) h += 360.f;
4545 }
4546
4547 dst[i] = h*hscale;
4548 dst[i+1] = l;
4549 dst[i+2] = s;
4550 }
4551 }
4552
4553 int srccn, blueIdx;
4554 float hrange;
4555 };
4556
4557
4558 struct RGB2HLS_b
4559 {
4560 typedef uchar channel_type;
4561
RGB2HLS_bcv::RGB2HLS_b4562 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange)
4563 : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange)
4564 {
4565 #if CV_NEON
4566 v_scale_inv = vdupq_n_f32(1.f/255.f);
4567 v_scale = vdupq_n_f32(255.f);
4568 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4569 #elif CV_SSE2
4570 v_scale_inv = _mm_set1_ps(1.f/255.f);
4571 v_scale = _mm_set1_ps(255.f);
4572 v_zero = _mm_setzero_si128();
4573 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
4574 #endif
4575 }
4576
4577 #if CV_SSE2
processcv::RGB2HLS_b4578 void process(const float * buf,
4579 __m128i & v_h, __m128i & v_l, __m128i & v_s) const
4580 {
4581 __m128 v_h0f = _mm_load_ps(buf);
4582 __m128 v_h1f = _mm_load_ps(buf + 4);
4583 __m128 v_l0f = _mm_load_ps(buf + 8);
4584 __m128 v_l1f = _mm_load_ps(buf + 12);
4585 __m128 v_s0f = _mm_load_ps(buf + 16);
4586 __m128 v_s1f = _mm_load_ps(buf + 20);
4587
4588 _mm_deinterleave_ps(v_h0f, v_h1f, v_l0f, v_l1f, v_s0f, v_s1f);
4589
4590 v_l0f = _mm_mul_ps(v_l0f, v_scale);
4591 v_l1f = _mm_mul_ps(v_l1f, v_scale);
4592 v_s0f = _mm_mul_ps(v_s0f, v_scale);
4593 v_s1f = _mm_mul_ps(v_s1f, v_scale);
4594
4595 v_h = _mm_packs_epi32(_mm_cvtps_epi32(v_h0f), _mm_cvtps_epi32(v_h1f));
4596 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f));
4597 v_s = _mm_packs_epi32(_mm_cvtps_epi32(v_s0f), _mm_cvtps_epi32(v_s1f));
4598 }
4599 #endif
4600
operator ()cv::RGB2HLS_b4601 void operator()(const uchar* src, uchar* dst, int n) const
4602 {
4603 int i, j, scn = srccn;
4604 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
4605
4606 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
4607 {
4608 int dn = std::min(n - i, (int)BLOCK_SIZE);
4609 j = 0;
4610
4611 #if CV_NEON
4612 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
4613 {
4614 uint16x8_t v_t0, v_t1, v_t2;
4615
4616 if (scn == 3)
4617 {
4618 uint8x8x3_t v_src = vld3_u8(src);
4619 v_t0 = vmovl_u8(v_src.val[0]);
4620 v_t1 = vmovl_u8(v_src.val[1]);
4621 v_t2 = vmovl_u8(v_src.val[2]);
4622 }
4623 else
4624 {
4625 uint8x8x4_t v_src = vld4_u8(src);
4626 v_t0 = vmovl_u8(v_src.val[0]);
4627 v_t1 = vmovl_u8(v_src.val[1]);
4628 v_t2 = vmovl_u8(v_src.val[2]);
4629 }
4630
4631 float32x4x3_t v_dst;
4632 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
4633 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
4634 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
4635 vst3q_f32(buf + j, v_dst);
4636
4637 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
4638 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
4639 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
4640 vst3q_f32(buf + j + 12, v_dst);
4641 }
4642 #elif CV_SSE2
4643 if (scn == 3 && haveSIMD)
4644 {
4645 for ( ; j <= (dn * 3 - 16); j += 16, src += 16)
4646 {
4647 __m128i v_src = _mm_loadu_si128((__m128i const *)src);
4648
4649 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero);
4650 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv));
4651 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv));
4652
4653 v_src_p = _mm_unpackhi_epi8(v_src, v_zero);
4654 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv));
4655 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv));
4656 }
4657
4658 int jr = j % 3;
4659 if (jr)
4660 src -= jr, j -= jr;
4661 }
4662 #endif
4663 for( ; j < dn*3; j += 3, src += scn )
4664 {
4665 buf[j] = src[0]*(1.f/255.f);
4666 buf[j+1] = src[1]*(1.f/255.f);
4667 buf[j+2] = src[2]*(1.f/255.f);
4668 }
4669 cvt(buf, buf, dn);
4670
4671 j = 0;
4672 #if CV_NEON
4673 for ( ; j <= (dn - 8) * 3; j += 24)
4674 {
4675 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4676
4677 uint8x8x3_t v_dst;
4678 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(v_src0.val[0])),
4679 vqmovn_u32(cv_vrndq_u32_f32(v_src1.val[0]))));
4680 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
4681 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
4682 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
4683 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
4684 vst3_u8(dst + j, v_dst);
4685 }
4686 #elif CV_SSE2
4687 if (haveSIMD)
4688 {
4689 for ( ; j <= (dn - 32) * 3; j += 96)
4690 {
4691 __m128i v_h_0, v_l_0, v_s_0;
4692 process(buf + j,
4693 v_h_0, v_l_0, v_s_0);
4694
4695 __m128i v_h_1, v_l_1, v_s_1;
4696 process(buf + j + 24,
4697 v_h_1, v_l_1, v_s_1);
4698
4699 __m128i v_h0 = _mm_packus_epi16(v_h_0, v_h_1);
4700 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1);
4701 __m128i v_s0 = _mm_packus_epi16(v_s_0, v_s_1);
4702
4703 process(buf + j + 48,
4704 v_h_0, v_l_0, v_s_0);
4705
4706 process(buf + j + 72,
4707 v_h_1, v_l_1, v_s_1);
4708
4709 __m128i v_h1 = _mm_packus_epi16(v_h_0, v_h_1);
4710 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1);
4711 __m128i v_s1 = _mm_packus_epi16(v_s_0, v_s_1);
4712
4713 _mm_interleave_epi8(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1);
4714
4715 _mm_storeu_si128((__m128i *)(dst + j), v_h0);
4716 _mm_storeu_si128((__m128i *)(dst + j + 16), v_h1);
4717 _mm_storeu_si128((__m128i *)(dst + j + 32), v_l0);
4718 _mm_storeu_si128((__m128i *)(dst + j + 48), v_l1);
4719 _mm_storeu_si128((__m128i *)(dst + j + 64), v_s0);
4720 _mm_storeu_si128((__m128i *)(dst + j + 80), v_s1);
4721 }
4722 }
4723 #endif
4724 for( ; j < dn*3; j += 3 )
4725 {
4726 dst[j] = saturate_cast<uchar>(buf[j]);
4727 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f);
4728 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f);
4729 }
4730 }
4731 }
4732
4733 int srccn;
4734 RGB2HLS_f cvt;
4735 #if CV_NEON
4736 float32x4_t v_scale, v_scale_inv;
4737 uint8x8_t v_alpha;
4738 #elif CV_SSE2
4739 __m128 v_scale, v_scale_inv;
4740 __m128i v_zero;
4741 bool haveSIMD;
4742 #endif
4743 };
4744
4745
4746 struct HLS2RGB_f
4747 {
4748 typedef float channel_type;
4749
HLS2RGB_fcv::HLS2RGB_f4750 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
4751 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {}
4752
operator ()cv::HLS2RGB_f4753 void operator()(const float* src, float* dst, int n) const
4754 {
4755 int i, bidx = blueIdx, dcn = dstcn;
4756 float _hscale = hscale;
4757 float alpha = ColorChannel<float>::max();
4758 n *= 3;
4759
4760 for( i = 0; i < n; i += 3, dst += dcn )
4761 {
4762 float h = src[i], l = src[i+1], s = src[i+2];
4763 float b, g, r;
4764
4765 if( s == 0 )
4766 b = g = r = l;
4767 else
4768 {
4769 static const int sector_data[][3]=
4770 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}};
4771 float tab[4];
4772 int sector;
4773
4774 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
4775 float p1 = 2*l - p2;
4776
4777 h *= _hscale;
4778 if( h < 0 )
4779 do h += 6; while( h < 0 );
4780 else if( h >= 6 )
4781 do h -= 6; while( h >= 6 );
4782
4783 assert( 0 <= h && h < 6 );
4784 sector = cvFloor(h);
4785 h -= sector;
4786
4787 tab[0] = p2;
4788 tab[1] = p1;
4789 tab[2] = p1 + (p2 - p1)*(1-h);
4790 tab[3] = p1 + (p2 - p1)*h;
4791
4792 b = tab[sector_data[sector][0]];
4793 g = tab[sector_data[sector][1]];
4794 r = tab[sector_data[sector][2]];
4795 }
4796
4797 dst[bidx] = b;
4798 dst[1] = g;
4799 dst[bidx^2] = r;
4800 if( dcn == 4 )
4801 dst[3] = alpha;
4802 }
4803 }
4804
4805 int dstcn, blueIdx;
4806 float hscale;
4807 };
4808
4809
4810 struct HLS2RGB_b
4811 {
4812 typedef uchar channel_type;
4813
HLS2RGB_bcv::HLS2RGB_b4814 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange)
4815 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange)
4816 {
4817 #if CV_NEON
4818 v_scale_inv = vdupq_n_f32(1.f/255.f);
4819 v_scale = vdupq_n_f32(255.f);
4820 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
4821 #elif CV_SSE2
4822 v_scale_inv = _mm_set1_ps(1.f/255.f);
4823 v_scale = _mm_set1_ps(255.f);
4824 v_zero = _mm_setzero_si128();
4825 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
4826 #endif
4827 }
4828
4829 #if CV_SSE2
4830 // 16s x 8
processcv::HLS2RGB_b4831 void process(__m128i v_r, __m128i v_g, __m128i v_b,
4832 float * buf) const
4833 {
4834 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero));
4835 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero));
4836 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero));
4837
4838 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero));
4839 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero));
4840 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero));
4841
4842 v_g0 = _mm_mul_ps(v_g0, v_scale_inv);
4843 v_b0 = _mm_mul_ps(v_b0, v_scale_inv);
4844
4845 v_g1 = _mm_mul_ps(v_g1, v_scale_inv);
4846 v_b1 = _mm_mul_ps(v_b1, v_scale_inv);
4847
4848 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
4849
4850 _mm_store_ps(buf, v_r0);
4851 _mm_store_ps(buf + 4, v_r1);
4852 _mm_store_ps(buf + 8, v_g0);
4853 _mm_store_ps(buf + 12, v_g1);
4854 _mm_store_ps(buf + 16, v_b0);
4855 _mm_store_ps(buf + 20, v_b1);
4856 }
4857 #endif
4858
operator ()cv::HLS2RGB_b4859 void operator()(const uchar* src, uchar* dst, int n) const
4860 {
4861 int i, j, dcn = dstcn;
4862 uchar alpha = ColorChannel<uchar>::max();
4863 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
4864
4865 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
4866 {
4867 int dn = std::min(n - i, (int)BLOCK_SIZE);
4868 j = 0;
4869
4870 #if CV_NEON
4871 for ( ; j <= (dn - 8) * 3; j += 24)
4872 {
4873 uint8x8x3_t v_src = vld3_u8(src + j);
4874 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
4875 v_t1 = vmovl_u8(v_src.val[1]),
4876 v_t2 = vmovl_u8(v_src.val[2]);
4877
4878 float32x4x3_t v_dst;
4879 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0)));
4880 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
4881 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
4882 vst3q_f32(buf + j, v_dst);
4883
4884 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0)));
4885 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
4886 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
4887 vst3q_f32(buf + j + 12, v_dst);
4888 }
4889 #elif CV_SSE2
4890 if (haveSIMD)
4891 {
4892 for ( ; j <= (dn - 32) * 3; j += 96)
4893 {
4894 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j));
4895 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16));
4896 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32));
4897 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48));
4898 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64));
4899 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80));
4900
4901 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
4902
4903 process(_mm_unpacklo_epi8(v_r0, v_zero),
4904 _mm_unpacklo_epi8(v_g0, v_zero),
4905 _mm_unpacklo_epi8(v_b0, v_zero),
4906 buf + j);
4907
4908 process(_mm_unpackhi_epi8(v_r0, v_zero),
4909 _mm_unpackhi_epi8(v_g0, v_zero),
4910 _mm_unpackhi_epi8(v_b0, v_zero),
4911 buf + j + 24);
4912
4913 process(_mm_unpacklo_epi8(v_r1, v_zero),
4914 _mm_unpacklo_epi8(v_g1, v_zero),
4915 _mm_unpacklo_epi8(v_b1, v_zero),
4916 buf + j + 48);
4917
4918 process(_mm_unpackhi_epi8(v_r1, v_zero),
4919 _mm_unpackhi_epi8(v_g1, v_zero),
4920 _mm_unpackhi_epi8(v_b1, v_zero),
4921 buf + j + 72);
4922 }
4923 }
4924 #endif
4925 for( ; j < dn*3; j += 3 )
4926 {
4927 buf[j] = src[j];
4928 buf[j+1] = src[j+1]*(1.f/255.f);
4929 buf[j+2] = src[j+2]*(1.f/255.f);
4930 }
4931 cvt(buf, buf, dn);
4932
4933 j = 0;
4934 #if CV_NEON
4935 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
4936 {
4937 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
4938 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
4939 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
4940 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
4941 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
4942 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
4943 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
4944
4945 if (dcn == 4)
4946 {
4947 uint8x8x4_t v_dst;
4948 v_dst.val[0] = v_dst0;
4949 v_dst.val[1] = v_dst1;
4950 v_dst.val[2] = v_dst2;
4951 v_dst.val[3] = v_alpha;
4952 vst4_u8(dst, v_dst);
4953 }
4954 else
4955 {
4956 uint8x8x3_t v_dst;
4957 v_dst.val[0] = v_dst0;
4958 v_dst.val[1] = v_dst1;
4959 v_dst.val[2] = v_dst2;
4960 vst3_u8(dst, v_dst);
4961 }
4962 }
4963 #elif CV_SSE2
4964 if (dcn == 3 && haveSIMD)
4965 {
4966 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16)
4967 {
4968 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
4969 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
4970 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
4971 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale);
4972
4973 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0),
4974 _mm_cvtps_epi32(v_src1));
4975 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2),
4976 _mm_cvtps_epi32(v_src3));
4977
4978 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
4979 }
4980
4981 int jr = j % 3;
4982 if (jr)
4983 dst -= jr, j -= jr;
4984 }
4985 #endif
4986
4987 for( ; j < dn*3; j += 3, dst += dcn )
4988 {
4989 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
4990 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
4991 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
4992 if( dcn == 4 )
4993 dst[3] = alpha;
4994 }
4995 }
4996 }
4997
4998 int dstcn;
4999 HLS2RGB_f cvt;
5000 #if CV_NEON
5001 float32x4_t v_scale, v_scale_inv;
5002 uint8x8_t v_alpha;
5003 #elif CV_SSE2
5004 __m128 v_scale, v_scale_inv;
5005 __m128i v_zero;
5006 bool haveSIMD;
5007 #endif
5008 };
5009
5010
5011 ///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
5012
5013 static const float D65[] = { 0.950456f, 1.f, 1.088754f };
5014
5015 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
5016 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
5017 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
5018
5019 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
5020 static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
5021
5022 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
5023 #undef lab_shift
5024 #define lab_shift xyz_shift
5025 #define gamma_shift 3
5026 #define lab_shift2 (lab_shift + gamma_shift)
5027 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
5028 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
5029
initLabTabs()5030 static void initLabTabs()
5031 {
5032 static bool initialized = false;
5033 if(!initialized)
5034 {
5035 float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
5036 int i;
5037 for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
5038 {
5039 float x = i*scale;
5040 f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
5041 }
5042 splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
5043
5044 scale = 1.f/GammaTabScale;
5045 for(i = 0; i <= GAMMA_TAB_SIZE; i++)
5046 {
5047 float x = i*scale;
5048 g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
5049 ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055);
5050 }
5051 splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
5052 splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
5053
5054 for(i = 0; i < 256; i++)
5055 {
5056 float x = i*(1.f/255.f);
5057 sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4)));
5058 linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
5059 }
5060
5061 for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
5062 {
5063 float x = i*(1.f/(255.f*(1 << gamma_shift)));
5064 LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
5065 }
5066 initialized = true;
5067 }
5068 }
5069
5070 struct RGB2Lab_b
5071 {
5072 typedef uchar channel_type;
5073
RGB2Lab_bcv::RGB2Lab_b5074 RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs,
5075 const float* _whitept, bool _srgb)
5076 : srccn(_srccn), srgb(_srgb)
5077 {
5078 static volatile int _3 = 3;
5079 initLabTabs();
5080
5081 if (!_coeffs)
5082 _coeffs = sRGB2XYZ_D65;
5083 if (!_whitept)
5084 _whitept = D65;
5085
5086 float scale[] =
5087 {
5088 (1 << lab_shift)/_whitept[0],
5089 (float)(1 << lab_shift),
5090 (1 << lab_shift)/_whitept[2]
5091 };
5092
5093 for( int i = 0; i < _3; i++ )
5094 {
5095 coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
5096 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
5097 coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
5098
5099 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
5100 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
5101 }
5102 }
5103
operator ()cv::RGB2Lab_b5104 void operator()(const uchar* src, uchar* dst, int n) const
5105 {
5106 const int Lscale = (116*255+50)/100;
5107 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
5108 const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b;
5109 int i, scn = srccn;
5110 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
5111 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
5112 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
5113 n *= 3;
5114
5115 for( i = 0; i < n; i += 3, src += scn )
5116 {
5117 int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
5118 int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
5119 int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)];
5120 int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)];
5121
5122 int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
5123 int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 );
5124 int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 );
5125
5126 dst[i] = saturate_cast<uchar>(L);
5127 dst[i+1] = saturate_cast<uchar>(a);
5128 dst[i+2] = saturate_cast<uchar>(b);
5129 }
5130 }
5131
5132 int srccn;
5133 int coeffs[9];
5134 bool srgb;
5135 };
5136
5137
5138 #define clip(value) \
5139 value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
5140
5141 struct RGB2Lab_f
5142 {
5143 typedef float channel_type;
5144
RGB2Lab_fcv::RGB2Lab_f5145 RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs,
5146 const float* _whitept, bool _srgb)
5147 : srccn(_srccn), srgb(_srgb)
5148 {
5149 volatile int _3 = 3;
5150 initLabTabs();
5151
5152 if (!_coeffs)
5153 _coeffs = sRGB2XYZ_D65;
5154 if (!_whitept)
5155 _whitept = D65;
5156
5157 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
5158
5159 for( int i = 0; i < _3; i++ )
5160 {
5161 int j = i * 3;
5162 coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i];
5163 coeffs[j + 1] = _coeffs[j + 1] * scale[i];
5164 coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i];
5165
5166 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
5167 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale );
5168 }
5169 }
5170
operator ()cv::RGB2Lab_f5171 void operator()(const float* src, float* dst, int n) const
5172 {
5173 int i, scn = srccn;
5174 float gscale = GammaTabScale;
5175 const float* gammaTab = srgb ? sRGBGammaTab : 0;
5176 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
5177 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
5178 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
5179 n *= 3;
5180
5181 static const float _1_3 = 1.0f / 3.0f;
5182 static const float _a = 16.0f / 116.0f;
5183 for (i = 0; i < n; i += 3, src += scn )
5184 {
5185 float R = clip(src[0]);
5186 float G = clip(src[1]);
5187 float B = clip(src[2]);
5188
5189 if (gammaTab)
5190 {
5191 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE);
5192 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE);
5193 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE);
5194 }
5195 float X = R*C0 + G*C1 + B*C2;
5196 float Y = R*C3 + G*C4 + B*C5;
5197 float Z = R*C6 + G*C7 + B*C8;
5198
5199 float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a);
5200 float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a);
5201 float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a);
5202
5203 float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
5204 float a = 500.f * (FX - FY);
5205 float b = 200.f * (FY - FZ);
5206
5207 dst[i] = L;
5208 dst[i + 1] = a;
5209 dst[i + 2] = b;
5210 }
5211 }
5212
5213 int srccn;
5214 float coeffs[9];
5215 bool srgb;
5216 };
5217
5218 struct Lab2RGB_f
5219 {
5220 typedef float channel_type;
5221
Lab2RGB_fcv::Lab2RGB_f5222 Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
5223 const float* _whitept, bool _srgb )
5224 : dstcn(_dstcn), srgb(_srgb)
5225 {
5226 initLabTabs();
5227
5228 if(!_coeffs)
5229 _coeffs = XYZ2sRGB_D65;
5230 if(!_whitept)
5231 _whitept = D65;
5232
5233 for( int i = 0; i < 3; i++ )
5234 {
5235 coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
5236 coeffs[i+3] = _coeffs[i+3]*_whitept[i];
5237 coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
5238 }
5239 }
5240
operator ()cv::Lab2RGB_f5241 void operator()(const float* src, float* dst, int n) const
5242 {
5243 int i, dcn = dstcn;
5244 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
5245 float gscale = GammaTabScale;
5246 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
5247 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
5248 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
5249 float alpha = ColorChannel<float>::max();
5250 n *= 3;
5251
5252 static const float lThresh = 0.008856f * 903.3f;
5253 static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
5254 for (i = 0; i < n; i += 3, dst += dcn)
5255 {
5256 float li = src[i];
5257 float ai = src[i + 1];
5258 float bi = src[i + 2];
5259
5260 float y, fy;
5261 if (li <= lThresh)
5262 {
5263 y = li / 903.3f;
5264 fy = 7.787f * y + 16.0f / 116.0f;
5265 }
5266 else
5267 {
5268 fy = (li + 16.0f) / 116.0f;
5269 y = fy * fy * fy;
5270 }
5271
5272 float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
5273
5274 for (int j = 0; j < 2; j++)
5275 if (fxz[j] <= fThresh)
5276 fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
5277 else
5278 fxz[j] = fxz[j] * fxz[j] * fxz[j];
5279
5280
5281 float x = fxz[0], z = fxz[1];
5282 float ro = C0 * x + C1 * y + C2 * z;
5283 float go = C3 * x + C4 * y + C5 * z;
5284 float bo = C6 * x + C7 * y + C8 * z;
5285 ro = clip(ro);
5286 go = clip(go);
5287 bo = clip(bo);
5288
5289 if (gammaTab)
5290 {
5291 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE);
5292 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE);
5293 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE);
5294 }
5295
5296 dst[0] = ro, dst[1] = go, dst[2] = bo;
5297 if( dcn == 4 )
5298 dst[3] = alpha;
5299 }
5300 }
5301
5302 int dstcn;
5303 float coeffs[9];
5304 bool srgb;
5305 };
5306
5307 #undef clip
5308
5309 struct Lab2RGB_b
5310 {
5311 typedef uchar channel_type;
5312
Lab2RGB_bcv::Lab2RGB_b5313 Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
5314 const float* _whitept, bool _srgb )
5315 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
5316 {
5317 #if CV_NEON
5318 v_scale_inv = vdupq_n_f32(100.f/255.f);
5319 v_scale = vdupq_n_f32(255.f);
5320 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
5321 v_128 = vdupq_n_f32(128.0f);
5322 #elif CV_SSE2
5323 v_scale_inv = _mm_set1_ps(100.f/255.f);
5324 v_scale = _mm_set1_ps(255.f);
5325 v_128 = _mm_set1_ps(128.0f);
5326 v_zero = _mm_setzero_si128();
5327 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
5328 #endif
5329 }
5330
5331 #if CV_SSE2
5332 // 16s x 8
processcv::Lab2RGB_b5333 void process(__m128i v_r, __m128i v_g, __m128i v_b,
5334 float * buf) const
5335 {
5336 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero));
5337 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero));
5338 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero));
5339
5340 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero));
5341 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero));
5342 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero));
5343
5344 v_r0 = _mm_mul_ps(v_r0, v_scale_inv);
5345 v_r1 = _mm_mul_ps(v_r1, v_scale_inv);
5346
5347 v_g0 = _mm_sub_ps(v_g0, v_128);
5348 v_g1 = _mm_sub_ps(v_g1, v_128);
5349 v_b0 = _mm_sub_ps(v_b0, v_128);
5350 v_b1 = _mm_sub_ps(v_b1, v_128);
5351
5352 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
5353
5354 _mm_store_ps(buf, v_r0);
5355 _mm_store_ps(buf + 4, v_r1);
5356 _mm_store_ps(buf + 8, v_g0);
5357 _mm_store_ps(buf + 12, v_g1);
5358 _mm_store_ps(buf + 16, v_b0);
5359 _mm_store_ps(buf + 20, v_b1);
5360 }
5361 #endif
5362
operator ()cv::Lab2RGB_b5363 void operator()(const uchar* src, uchar* dst, int n) const
5364 {
5365 int i, j, dcn = dstcn;
5366 uchar alpha = ColorChannel<uchar>::max();
5367 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
5368
5369 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
5370 {
5371 int dn = std::min(n - i, (int)BLOCK_SIZE);
5372 j = 0;
5373
5374 #if CV_NEON
5375 for ( ; j <= (dn - 8) * 3; j += 24)
5376 {
5377 uint8x8x3_t v_src = vld3_u8(src + j);
5378 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
5379 v_t1 = vmovl_u8(v_src.val[1]),
5380 v_t2 = vmovl_u8(v_src.val[2]);
5381
5382 float32x4x3_t v_dst;
5383 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
5384 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128);
5385 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128);
5386 vst3q_f32(buf + j, v_dst);
5387
5388 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
5389 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128);
5390 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128);
5391 vst3q_f32(buf + j + 12, v_dst);
5392 }
5393 #elif CV_SSE2
5394 if (haveSIMD)
5395 {
5396 for ( ; j <= (dn - 32) * 3; j += 96)
5397 {
5398 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j));
5399 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16));
5400 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32));
5401 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48));
5402 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64));
5403 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80));
5404
5405 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
5406
5407 process(_mm_unpacklo_epi8(v_r0, v_zero),
5408 _mm_unpacklo_epi8(v_g0, v_zero),
5409 _mm_unpacklo_epi8(v_b0, v_zero),
5410 buf + j);
5411
5412 process(_mm_unpackhi_epi8(v_r0, v_zero),
5413 _mm_unpackhi_epi8(v_g0, v_zero),
5414 _mm_unpackhi_epi8(v_b0, v_zero),
5415 buf + j + 24);
5416
5417 process(_mm_unpacklo_epi8(v_r1, v_zero),
5418 _mm_unpacklo_epi8(v_g1, v_zero),
5419 _mm_unpacklo_epi8(v_b1, v_zero),
5420 buf + j + 48);
5421
5422 process(_mm_unpackhi_epi8(v_r1, v_zero),
5423 _mm_unpackhi_epi8(v_g1, v_zero),
5424 _mm_unpackhi_epi8(v_b1, v_zero),
5425 buf + j + 72);
5426 }
5427 }
5428 #endif
5429
5430 for( ; j < dn*3; j += 3 )
5431 {
5432 buf[j] = src[j]*(100.f/255.f);
5433 buf[j+1] = (float)(src[j+1] - 128);
5434 buf[j+2] = (float)(src[j+2] - 128);
5435 }
5436 cvt(buf, buf, dn);
5437 j = 0;
5438
5439 #if CV_NEON
5440 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
5441 {
5442 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
5443 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
5444 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
5445 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
5446 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
5447 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
5448 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
5449
5450 if (dcn == 4)
5451 {
5452 uint8x8x4_t v_dst;
5453 v_dst.val[0] = v_dst0;
5454 v_dst.val[1] = v_dst1;
5455 v_dst.val[2] = v_dst2;
5456 v_dst.val[3] = v_alpha;
5457 vst4_u8(dst, v_dst);
5458 }
5459 else
5460 {
5461 uint8x8x3_t v_dst;
5462 v_dst.val[0] = v_dst0;
5463 v_dst.val[1] = v_dst1;
5464 v_dst.val[2] = v_dst2;
5465 vst3_u8(dst, v_dst);
5466 }
5467 }
5468 #elif CV_SSE2
5469 if (dcn == 3 && haveSIMD)
5470 {
5471 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16)
5472 {
5473 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
5474 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
5475 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
5476 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale);
5477
5478 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0),
5479 _mm_cvtps_epi32(v_src1));
5480 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2),
5481 _mm_cvtps_epi32(v_src3));
5482
5483 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
5484 }
5485
5486 int jr = j % 3;
5487 if (jr)
5488 dst -= jr, j -= jr;
5489 }
5490 #endif
5491
5492 for( ; j < dn*3; j += 3, dst += dcn )
5493 {
5494 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
5495 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
5496 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
5497 if( dcn == 4 )
5498 dst[3] = alpha;
5499 }
5500 }
5501 }
5502
5503 int dstcn;
5504 Lab2RGB_f cvt;
5505
5506 #if CV_NEON
5507 float32x4_t v_scale, v_scale_inv, v_128;
5508 uint8x8_t v_alpha;
5509 #elif CV_SSE2
5510 __m128 v_scale, v_scale_inv, v_128;
5511 __m128i v_zero;
5512 bool haveSIMD;
5513 #endif
5514 };
5515
5516
5517 ///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
5518
5519 struct RGB2Luv_f
5520 {
5521 typedef float channel_type;
5522
RGB2Luv_fcv::RGB2Luv_f5523 RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
5524 const float* whitept, bool _srgb )
5525 : srccn(_srccn), srgb(_srgb)
5526 {
5527 volatile int i;
5528 initLabTabs();
5529
5530 if(!_coeffs) _coeffs = sRGB2XYZ_D65;
5531 if(!whitept) whitept = D65;
5532
5533 for( i = 0; i < 3; i++ )
5534 {
5535 coeffs[i*3] = _coeffs[i*3];
5536 coeffs[i*3+1] = _coeffs[i*3+1];
5537 coeffs[i*3+2] = _coeffs[i*3+2];
5538 if( blueIdx == 0 )
5539 std::swap(coeffs[i*3], coeffs[i*3+2]);
5540 CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
5541 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
5542 }
5543
5544 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
5545 un = 4*whitept[0]*d;
5546 vn = 9*whitept[1]*d;
5547
5548 CV_Assert(whitept[1] == 1.f);
5549 }
5550
operator ()cv::RGB2Luv_f5551 void operator()(const float* src, float* dst, int n) const
5552 {
5553 int i, scn = srccn;
5554 float gscale = GammaTabScale;
5555 const float* gammaTab = srgb ? sRGBGammaTab : 0;
5556 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
5557 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
5558 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
5559 float _un = 13*un, _vn = 13*vn;
5560 n *= 3;
5561
5562 for( i = 0; i < n; i += 3, src += scn )
5563 {
5564 float R = src[0], G = src[1], B = src[2];
5565 if( gammaTab )
5566 {
5567 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
5568 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
5569 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
5570 }
5571
5572 float X = R*C0 + G*C1 + B*C2;
5573 float Y = R*C3 + G*C4 + B*C5;
5574 float Z = R*C6 + G*C7 + B*C8;
5575
5576 float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
5577 L = 116.f*L - 16.f;
5578
5579 float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON);
5580 float u = L*(X*d - _un);
5581 float v = L*((9*0.25f)*Y*d - _vn);
5582
5583 dst[i] = L; dst[i+1] = u; dst[i+2] = v;
5584 }
5585 }
5586
5587 int srccn;
5588 float coeffs[9], un, vn;
5589 bool srgb;
5590 };
5591
5592
5593 struct Luv2RGB_f
5594 {
5595 typedef float channel_type;
5596
Luv2RGB_fcv::Luv2RGB_f5597 Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
5598 const float* whitept, bool _srgb )
5599 : dstcn(_dstcn), srgb(_srgb)
5600 {
5601 initLabTabs();
5602
5603 if(!_coeffs) _coeffs = XYZ2sRGB_D65;
5604 if(!whitept) whitept = D65;
5605
5606 for( int i = 0; i < 3; i++ )
5607 {
5608 coeffs[i+(blueIdx^2)*3] = _coeffs[i];
5609 coeffs[i+3] = _coeffs[i+3];
5610 coeffs[i+blueIdx*3] = _coeffs[i+6];
5611 }
5612
5613 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3);
5614 un = 4*whitept[0]*d;
5615 vn = 9*whitept[1]*d;
5616
5617 CV_Assert(whitept[1] == 1.f);
5618 }
5619
operator ()cv::Luv2RGB_f5620 void operator()(const float* src, float* dst, int n) const
5621 {
5622 int i, dcn = dstcn;
5623 const float* gammaTab = srgb ? sRGBInvGammaTab : 0;
5624 float gscale = GammaTabScale;
5625 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
5626 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
5627 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
5628 float alpha = ColorChannel<float>::max();
5629 float _un = un, _vn = vn;
5630 n *= 3;
5631
5632 for( i = 0; i < n; i += 3, dst += dcn )
5633 {
5634 float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z;
5635 Y = (L + 16.f) * (1.f/116.f);
5636 Y = Y*Y*Y;
5637 d = (1.f/13.f)/L;
5638 u = u*d + _un;
5639 v = v*d + _vn;
5640 float iv = 1.f/v;
5641 X = 2.25f * u * Y * iv ;
5642 Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv;
5643
5644 float R = X*C0 + Y*C1 + Z*C2;
5645 float G = X*C3 + Y*C4 + Z*C5;
5646 float B = X*C6 + Y*C7 + Z*C8;
5647
5648 R = std::min(std::max(R, 0.f), 1.f);
5649 G = std::min(std::max(G, 0.f), 1.f);
5650 B = std::min(std::max(B, 0.f), 1.f);
5651
5652 if( gammaTab )
5653 {
5654 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE);
5655 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE);
5656 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE);
5657 }
5658
5659 dst[0] = R; dst[1] = G; dst[2] = B;
5660 if( dcn == 4 )
5661 dst[3] = alpha;
5662 }
5663 }
5664
5665 int dstcn;
5666 float coeffs[9], un, vn;
5667 bool srgb;
5668 };
5669
5670
5671 struct RGB2Luv_b
5672 {
5673 typedef uchar channel_type;
5674
RGB2Luv_bcv::RGB2Luv_b5675 RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
5676 const float* _whitept, bool _srgb )
5677 : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb)
5678 {
5679 #if CV_NEON
5680 v_scale_inv = vdupq_n_f32(1.f/255.f);
5681 v_scale = vdupq_n_f32(2.55f);
5682 v_coeff1 = vdupq_n_f32(0.72033898305084743f);
5683 v_coeff2 = vdupq_n_f32(96.525423728813564f);
5684 v_coeff3 = vdupq_n_f32(0.9732824427480916f);
5685 v_coeff4 = vdupq_n_f32(136.259541984732824f);
5686 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
5687 #elif CV_SSE2
5688 v_zero = _mm_setzero_si128();
5689 v_scale_inv = _mm_set1_ps(1.f/255.f);
5690 v_scale = _mm_set1_ps(2.55f);
5691 v_coeff1 = _mm_set1_ps(0.72033898305084743f);
5692 v_coeff2 = _mm_set1_ps(96.525423728813564f);
5693 v_coeff3 = _mm_set1_ps(0.9732824427480916f);
5694 v_coeff4 = _mm_set1_ps(136.259541984732824f);
5695 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
5696 #endif
5697 }
5698
5699 #if CV_SSE2
processcv::RGB2Luv_b5700 void process(const float * buf,
5701 __m128i & v_l, __m128i & v_u, __m128i & v_v) const
5702 {
5703 __m128 v_l0f = _mm_load_ps(buf);
5704 __m128 v_l1f = _mm_load_ps(buf + 4);
5705 __m128 v_u0f = _mm_load_ps(buf + 8);
5706 __m128 v_u1f = _mm_load_ps(buf + 12);
5707 __m128 v_v0f = _mm_load_ps(buf + 16);
5708 __m128 v_v1f = _mm_load_ps(buf + 20);
5709
5710 _mm_deinterleave_ps(v_l0f, v_l1f, v_u0f, v_u1f, v_v0f, v_v1f);
5711
5712 v_l0f = _mm_mul_ps(v_l0f, v_scale);
5713 v_l1f = _mm_mul_ps(v_l1f, v_scale);
5714 v_u0f = _mm_add_ps(_mm_mul_ps(v_u0f, v_coeff1), v_coeff2);
5715 v_u1f = _mm_add_ps(_mm_mul_ps(v_u1f, v_coeff1), v_coeff2);
5716 v_v0f = _mm_add_ps(_mm_mul_ps(v_v0f, v_coeff3), v_coeff4);
5717 v_v1f = _mm_add_ps(_mm_mul_ps(v_v1f, v_coeff3), v_coeff4);
5718
5719 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f));
5720 v_u = _mm_packs_epi32(_mm_cvtps_epi32(v_u0f), _mm_cvtps_epi32(v_u1f));
5721 v_v = _mm_packs_epi32(_mm_cvtps_epi32(v_v0f), _mm_cvtps_epi32(v_v1f));
5722 }
5723 #endif
5724
operator ()cv::RGB2Luv_b5725 void operator()(const uchar* src, uchar* dst, int n) const
5726 {
5727 int i, j, scn = srccn;
5728 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
5729
5730 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
5731 {
5732 int dn = std::min(n - i, (int)BLOCK_SIZE);
5733 j = 0;
5734
5735 #if CV_NEON
5736 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn)
5737 {
5738 uint16x8_t v_t0, v_t1, v_t2;
5739
5740 if (scn == 3)
5741 {
5742 uint8x8x3_t v_src = vld3_u8(src);
5743 v_t0 = vmovl_u8(v_src.val[0]);
5744 v_t1 = vmovl_u8(v_src.val[1]);
5745 v_t2 = vmovl_u8(v_src.val[2]);
5746 }
5747 else
5748 {
5749 uint8x8x4_t v_src = vld4_u8(src);
5750 v_t0 = vmovl_u8(v_src.val[0]);
5751 v_t1 = vmovl_u8(v_src.val[1]);
5752 v_t2 = vmovl_u8(v_src.val[2]);
5753 }
5754
5755 float32x4x3_t v_dst;
5756 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
5757 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv);
5758 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv);
5759 vst3q_f32(buf + j, v_dst);
5760
5761 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
5762 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv);
5763 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv);
5764 vst3q_f32(buf + j + 12, v_dst);
5765 }
5766 #elif CV_SSE2
5767 if (scn == 3 && haveSIMD)
5768 {
5769 for ( ; j <= (dn * 3 - 16); j += 16, src += 16)
5770 {
5771 __m128i v_src = _mm_loadu_si128((__m128i const *)src);
5772
5773 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero);
5774 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv));
5775 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv));
5776
5777 v_src_p = _mm_unpackhi_epi8(v_src, v_zero);
5778 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv));
5779 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv));
5780 }
5781
5782 int jr = j % 3;
5783 if (jr)
5784 src -= jr, j -= jr;
5785 }
5786 #endif
5787 for( ; j < dn*3; j += 3, src += scn )
5788 {
5789 buf[j] = src[0]*(1.f/255.f);
5790 buf[j+1] = (float)(src[1]*(1.f/255.f));
5791 buf[j+2] = (float)(src[2]*(1.f/255.f));
5792 }
5793 cvt(buf, buf, dn);
5794
5795 j = 0;
5796 #if CV_NEON
5797 for ( ; j <= (dn - 8) * 3; j += 24)
5798 {
5799 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
5800
5801 uint8x8x3_t v_dst;
5802 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
5803 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
5804 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))),
5805 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2)))));
5806 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))),
5807 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4)))));
5808
5809 vst3_u8(dst + j, v_dst);
5810 }
5811 #elif CV_SSE2
5812 if (haveSIMD)
5813 {
5814 for ( ; j <= (dn - 32) * 3; j += 96)
5815 {
5816 __m128i v_l_0, v_u_0, v_v_0;
5817 process(buf + j,
5818 v_l_0, v_u_0, v_v_0);
5819
5820 __m128i v_l_1, v_u_1, v_v_1;
5821 process(buf + j + 24,
5822 v_l_1, v_u_1, v_v_1);
5823
5824 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1);
5825 __m128i v_u0 = _mm_packus_epi16(v_u_0, v_u_1);
5826 __m128i v_v0 = _mm_packus_epi16(v_v_0, v_v_1);
5827
5828 process(buf + j + 48,
5829 v_l_0, v_u_0, v_v_0);
5830
5831 process(buf + j + 72,
5832 v_l_1, v_u_1, v_v_1);
5833
5834 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1);
5835 __m128i v_u1 = _mm_packus_epi16(v_u_0, v_u_1);
5836 __m128i v_v1 = _mm_packus_epi16(v_v_0, v_v_1);
5837
5838 _mm_interleave_epi8(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1);
5839
5840 _mm_storeu_si128((__m128i *)(dst + j), v_l0);
5841 _mm_storeu_si128((__m128i *)(dst + j + 16), v_l1);
5842 _mm_storeu_si128((__m128i *)(dst + j + 32), v_u0);
5843 _mm_storeu_si128((__m128i *)(dst + j + 48), v_u1);
5844 _mm_storeu_si128((__m128i *)(dst + j + 64), v_v0);
5845 _mm_storeu_si128((__m128i *)(dst + j + 80), v_v1);
5846 }
5847 }
5848 #endif
5849
5850 for( ; j < dn*3; j += 3 )
5851 {
5852 dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
5853 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
5854 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f);
5855 }
5856 }
5857 }
5858
5859 int srccn;
5860 RGB2Luv_f cvt;
5861
5862 #if CV_NEON
5863 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4;
5864 uint8x8_t v_alpha;
5865 #elif CV_SSE2
5866 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4;
5867 __m128i v_zero;
5868 bool haveSIMD;
5869 #endif
5870 };
5871
5872
5873 struct Luv2RGB_b
5874 {
5875 typedef uchar channel_type;
5876
Luv2RGB_bcv::Luv2RGB_b5877 Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
5878 const float* _whitept, bool _srgb )
5879 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
5880 {
5881 #if CV_NEON
5882 v_scale_inv = vdupq_n_f32(100.f/255.f);
5883 v_coeff1 = vdupq_n_f32(1.388235294117647f);
5884 v_coeff2 = vdupq_n_f32(1.027450980392157f);
5885 v_134 = vdupq_n_f32(134.f);
5886 v_140 = vdupq_n_f32(140.f);
5887 v_scale = vdupq_n_f32(255.f);
5888 v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
5889 #elif CV_SSE2
5890 v_scale_inv = _mm_set1_ps(100.f/255.f);
5891 v_coeff1 = _mm_set1_ps(1.388235294117647f);
5892 v_coeff2 = _mm_set1_ps(1.027450980392157f);
5893 v_134 = _mm_set1_ps(134.f);
5894 v_140 = _mm_set1_ps(140.f);
5895 v_scale = _mm_set1_ps(255.f);
5896 v_zero = _mm_setzero_si128();
5897 haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
5898 #endif
5899 }
5900
5901 #if CV_SSE2
5902 // 16s x 8
processcv::Luv2RGB_b5903 void process(__m128i v_l, __m128i v_u, __m128i v_v,
5904 float * buf) const
5905 {
5906 __m128 v_l0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_l, v_zero));
5907 __m128 v_u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_u, v_zero));
5908 __m128 v_v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_v, v_zero));
5909
5910 __m128 v_l1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_l, v_zero));
5911 __m128 v_u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_u, v_zero));
5912 __m128 v_v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_v, v_zero));
5913
5914 v_l0 = _mm_mul_ps(v_l0, v_scale_inv);
5915 v_l1 = _mm_mul_ps(v_l1, v_scale_inv);
5916
5917 v_u0 = _mm_sub_ps(_mm_mul_ps(v_u0, v_coeff1), v_134);
5918 v_u1 = _mm_sub_ps(_mm_mul_ps(v_u1, v_coeff1), v_134);
5919 v_v0 = _mm_sub_ps(_mm_mul_ps(v_v0, v_coeff2), v_140);
5920 v_v1 = _mm_sub_ps(_mm_mul_ps(v_v1, v_coeff2), v_140);
5921
5922 _mm_interleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1);
5923
5924 _mm_store_ps(buf, v_l0);
5925 _mm_store_ps(buf + 4, v_l1);
5926 _mm_store_ps(buf + 8, v_u0);
5927 _mm_store_ps(buf + 12, v_u1);
5928 _mm_store_ps(buf + 16, v_v0);
5929 _mm_store_ps(buf + 20, v_v1);
5930 }
5931 #endif
5932
operator ()cv::Luv2RGB_b5933 void operator()(const uchar* src, uchar* dst, int n) const
5934 {
5935 int i, j, dcn = dstcn;
5936 uchar alpha = ColorChannel<uchar>::max();
5937 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
5938
5939 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
5940 {
5941 int dn = std::min(n - i, (int)BLOCK_SIZE);
5942 j = 0;
5943
5944 #if CV_NEON
5945 for ( ; j <= (dn - 8) * 3; j += 24)
5946 {
5947 uint8x8x3_t v_src = vld3_u8(src + j);
5948 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]),
5949 v_t1 = vmovl_u8(v_src.val[1]),
5950 v_t2 = vmovl_u8(v_src.val[2]);
5951
5952 float32x4x3_t v_dst;
5953 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv);
5954 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134);
5955 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140);
5956 vst3q_f32(buf + j, v_dst);
5957
5958 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv);
5959 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134);
5960 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140);
5961 vst3q_f32(buf + j + 12, v_dst);
5962 }
5963 #elif CV_SSE2
5964 if (haveSIMD)
5965 {
5966 for ( ; j <= (dn - 32) * 3; j += 96)
5967 {
5968 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j));
5969 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16));
5970 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32));
5971 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48));
5972 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64));
5973 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80));
5974
5975 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1);
5976
5977 process(_mm_unpacklo_epi8(v_r0, v_zero),
5978 _mm_unpacklo_epi8(v_g0, v_zero),
5979 _mm_unpacklo_epi8(v_b0, v_zero),
5980 buf + j);
5981
5982 process(_mm_unpackhi_epi8(v_r0, v_zero),
5983 _mm_unpackhi_epi8(v_g0, v_zero),
5984 _mm_unpackhi_epi8(v_b0, v_zero),
5985 buf + j + 24);
5986
5987 process(_mm_unpacklo_epi8(v_r1, v_zero),
5988 _mm_unpacklo_epi8(v_g1, v_zero),
5989 _mm_unpacklo_epi8(v_b1, v_zero),
5990 buf + j + 48);
5991
5992 process(_mm_unpackhi_epi8(v_r1, v_zero),
5993 _mm_unpackhi_epi8(v_g1, v_zero),
5994 _mm_unpackhi_epi8(v_b1, v_zero),
5995 buf + j + 72);
5996 }
5997 }
5998 #endif
5999 for( ; j < dn*3; j += 3 )
6000 {
6001 buf[j] = src[j]*(100.f/255.f);
6002 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f);
6003 buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f);
6004 }
6005 cvt(buf, buf, dn);
6006
6007 j = 0;
6008 #if CV_NEON
6009 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8)
6010 {
6011 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12);
6012 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))),
6013 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale)))));
6014 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))),
6015 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale)))));
6016 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))),
6017 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale)))));
6018
6019 if (dcn == 4)
6020 {
6021 uint8x8x4_t v_dst;
6022 v_dst.val[0] = v_dst0;
6023 v_dst.val[1] = v_dst1;
6024 v_dst.val[2] = v_dst2;
6025 v_dst.val[3] = v_alpha;
6026 vst4_u8(dst, v_dst);
6027 }
6028 else
6029 {
6030 uint8x8x3_t v_dst;
6031 v_dst.val[0] = v_dst0;
6032 v_dst.val[1] = v_dst1;
6033 v_dst.val[2] = v_dst2;
6034 vst3_u8(dst, v_dst);
6035 }
6036 }
6037 #elif CV_SSE2
6038 if (dcn == 3 && haveSIMD)
6039 {
6040 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16)
6041 {
6042 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale);
6043 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale);
6044 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale);
6045 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale);
6046
6047 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0),
6048 _mm_cvtps_epi32(v_src1));
6049 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2),
6050 _mm_cvtps_epi32(v_src3));
6051
6052 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1));
6053 }
6054
6055 int jr = j % 3;
6056 if (jr)
6057 dst -= jr, j -= jr;
6058 }
6059 #endif
6060
6061 for( ; j < dn*3; j += 3, dst += dcn )
6062 {
6063 dst[0] = saturate_cast<uchar>(buf[j]*255.f);
6064 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f);
6065 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f);
6066 if( dcn == 4 )
6067 dst[3] = alpha;
6068 }
6069 }
6070 }
6071
6072 int dstcn;
6073 Luv2RGB_f cvt;
6074
6075 #if CV_NEON
6076 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140;
6077 uint8x8_t v_alpha;
6078 #elif CV_SSE2
6079 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140;
6080 __m128i v_zero;
6081 bool haveSIMD;
6082 #endif
6083 };
6084
6085
6086 ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
6087
6088 const int ITUR_BT_601_CY = 1220542;
6089 const int ITUR_BT_601_CUB = 2116026;
6090 const int ITUR_BT_601_CUG = -409993;
6091 const int ITUR_BT_601_CVG = -852492;
6092 const int ITUR_BT_601_CVR = 1673527;
6093 const int ITUR_BT_601_SHIFT = 20;
6094
6095 // Coefficients for RGB to YUV420p conversion
6096 const int ITUR_BT_601_CRY = 269484;
6097 const int ITUR_BT_601_CGY = 528482;
6098 const int ITUR_BT_601_CBY = 102760;
6099 const int ITUR_BT_601_CRU = -155188;
6100 const int ITUR_BT_601_CGU = -305135;
6101 const int ITUR_BT_601_CBU = 460324;
6102 const int ITUR_BT_601_CGV = -385875;
6103 const int ITUR_BT_601_CBV = -74448;
6104
6105 template<int bIdx, int uIdx>
6106 struct YUV420sp2RGB888Invoker : ParallelLoopBody
6107 {
6108 Mat* dst;
6109 const uchar* my1, *muv;
6110 int width, stride;
6111
YUV420sp2RGB888Invokercv::YUV420sp2RGB888Invoker6112 YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
6113 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
6114
operator ()cv::YUV420sp2RGB888Invoker6115 void operator()(const Range& range) const
6116 {
6117 int rangeBegin = range.start * 2;
6118 int rangeEnd = range.end * 2;
6119
6120 //R = 1.164(Y - 16) + 1.596(V - 128)
6121 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
6122 //B = 1.164(Y - 16) + 2.018(U - 128)
6123
6124 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
6125 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
6126 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
6127
6128 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
6129
6130 #ifdef HAVE_TEGRA_OPTIMIZATION
6131 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
6132 return;
6133 #endif
6134
6135 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
6136 {
6137 uchar* row1 = dst->ptr<uchar>(j);
6138 uchar* row2 = dst->ptr<uchar>(j + 1);
6139 const uchar* y2 = y1 + stride;
6140
6141 for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6)
6142 {
6143 int u = int(uv[i + 0 + uIdx]) - 128;
6144 int v = int(uv[i + 1 - uIdx]) - 128;
6145
6146 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6147 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6148 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6149
6150 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
6151 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6152 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6153 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6154
6155 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
6156 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6157 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6158 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6159
6160 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
6161 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
6162 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
6163 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
6164
6165 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
6166 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
6167 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
6168 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
6169 }
6170 }
6171 }
6172 };
6173
6174 template<int bIdx, int uIdx>
6175 struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
6176 {
6177 Mat* dst;
6178 const uchar* my1, *muv;
6179 int width, stride;
6180
YUV420sp2RGBA8888Invokercv::YUV420sp2RGBA8888Invoker6181 YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
6182 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
6183
operator ()cv::YUV420sp2RGBA8888Invoker6184 void operator()(const Range& range) const
6185 {
6186 int rangeBegin = range.start * 2;
6187 int rangeEnd = range.end * 2;
6188
6189 //R = 1.164(Y - 16) + 1.596(V - 128)
6190 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
6191 //B = 1.164(Y - 16) + 2.018(U - 128)
6192
6193 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20
6194 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
6195 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
6196
6197 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2;
6198
6199 #ifdef HAVE_TEGRA_OPTIMIZATION
6200 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols))
6201 return;
6202 #endif
6203
6204 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride)
6205 {
6206 uchar* row1 = dst->ptr<uchar>(j);
6207 uchar* row2 = dst->ptr<uchar>(j + 1);
6208 const uchar* y2 = y1 + stride;
6209
6210 for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8)
6211 {
6212 int u = int(uv[i + 0 + uIdx]) - 128;
6213 int v = int(uv[i + 1 - uIdx]) - 128;
6214
6215 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6216 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6217 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6218
6219 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY;
6220 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6221 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6222 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6223 row1[3] = uchar(0xff);
6224
6225 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY;
6226 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6227 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6228 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6229 row1[7] = uchar(0xff);
6230
6231 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY;
6232 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
6233 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
6234 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
6235 row2[3] = uchar(0xff);
6236
6237 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY;
6238 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
6239 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
6240 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
6241 row2[7] = uchar(0xff);
6242 }
6243 }
6244 }
6245 };
6246
6247 template<int bIdx>
6248 struct YUV420p2RGB888Invoker : ParallelLoopBody
6249 {
6250 Mat* dst;
6251 const uchar* my1, *mu, *mv;
6252 int width, stride;
6253 int ustepIdx, vstepIdx;
6254
YUV420p2RGB888Invokercv::YUV420p2RGB888Invoker6255 YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
6256 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
6257
operator ()cv::YUV420p2RGB888Invoker6258 void operator()(const Range& range) const
6259 {
6260 const int rangeBegin = range.start * 2;
6261 const int rangeEnd = range.end * 2;
6262
6263 int uvsteps[2] = {width/2, stride - width/2};
6264 int usIdx = ustepIdx, vsIdx = vstepIdx;
6265
6266 const uchar* y1 = my1 + rangeBegin * stride;
6267 const uchar* u1 = mu + (range.start / 2) * stride;
6268 const uchar* v1 = mv + (range.start / 2) * stride;
6269
6270 if(range.start % 2 == 1)
6271 {
6272 u1 += uvsteps[(usIdx++) & 1];
6273 v1 += uvsteps[(vsIdx++) & 1];
6274 }
6275
6276 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
6277 {
6278 uchar* row1 = dst->ptr<uchar>(j);
6279 uchar* row2 = dst->ptr<uchar>(j + 1);
6280 const uchar* y2 = y1 + stride;
6281
6282 for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6)
6283 {
6284 int u = int(u1[i]) - 128;
6285 int v = int(v1[i]) - 128;
6286
6287 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6288 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6289 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6290
6291 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
6292 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6293 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6294 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6295
6296 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
6297 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6298 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6299 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6300
6301 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
6302 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
6303 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
6304 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
6305
6306 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
6307 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
6308 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
6309 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
6310 }
6311 }
6312 }
6313 };
6314
6315 template<int bIdx>
6316 struct YUV420p2RGBA8888Invoker : ParallelLoopBody
6317 {
6318 Mat* dst;
6319 const uchar* my1, *mu, *mv;
6320 int width, stride;
6321 int ustepIdx, vstepIdx;
6322
YUV420p2RGBA8888Invokercv::YUV420p2RGBA8888Invoker6323 YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
6324 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
6325
operator ()cv::YUV420p2RGBA8888Invoker6326 void operator()(const Range& range) const
6327 {
6328 int rangeBegin = range.start * 2;
6329 int rangeEnd = range.end * 2;
6330
6331 int uvsteps[2] = {width/2, stride - width/2};
6332 int usIdx = ustepIdx, vsIdx = vstepIdx;
6333
6334 const uchar* y1 = my1 + rangeBegin * stride;
6335 const uchar* u1 = mu + (range.start / 2) * stride;
6336 const uchar* v1 = mv + (range.start / 2) * stride;
6337
6338 if(range.start % 2 == 1)
6339 {
6340 u1 += uvsteps[(usIdx++) & 1];
6341 v1 += uvsteps[(vsIdx++) & 1];
6342 }
6343
6344 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1])
6345 {
6346 uchar* row1 = dst->ptr<uchar>(j);
6347 uchar* row2 = dst->ptr<uchar>(j + 1);
6348 const uchar* y2 = y1 + stride;
6349
6350 for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8)
6351 {
6352 int u = int(u1[i]) - 128;
6353 int v = int(v1[i]) - 128;
6354
6355 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6356 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6357 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6358
6359 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY;
6360 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6361 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6362 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6363 row1[3] = uchar(0xff);
6364
6365 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY;
6366 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6367 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6368 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6369 row1[7] = uchar(0xff);
6370
6371 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY;
6372 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT);
6373 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT);
6374 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT);
6375 row2[3] = uchar(0xff);
6376
6377 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY;
6378 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT);
6379 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT);
6380 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT);
6381 row2[7] = uchar(0xff);
6382 }
6383 }
6384 }
6385 };
6386
6387 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240)
6388
6389 template<int bIdx, int uIdx>
cvtYUV420sp2RGB(Mat & _dst,int _stride,const uchar * _y1,const uchar * _uv)6390 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
6391 {
6392 YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
6393 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
6394 parallel_for_(Range(0, _dst.rows/2), converter);
6395 else
6396 converter(Range(0, _dst.rows/2));
6397 }
6398
6399 template<int bIdx, int uIdx>
cvtYUV420sp2RGBA(Mat & _dst,int _stride,const uchar * _y1,const uchar * _uv)6400 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
6401 {
6402 YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
6403 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
6404 parallel_for_(Range(0, _dst.rows/2), converter);
6405 else
6406 converter(Range(0, _dst.rows/2));
6407 }
6408
6409 template<int bIdx>
cvtYUV420p2RGB(Mat & _dst,int _stride,const uchar * _y1,const uchar * _u,const uchar * _v,int ustepIdx,int vstepIdx)6410 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
6411 {
6412 YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
6413 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
6414 parallel_for_(Range(0, _dst.rows/2), converter);
6415 else
6416 converter(Range(0, _dst.rows/2));
6417 }
6418
6419 template<int bIdx>
cvtYUV420p2RGBA(Mat & _dst,int _stride,const uchar * _y1,const uchar * _u,const uchar * _v,int ustepIdx,int vstepIdx)6420 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
6421 {
6422 YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
6423 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
6424 parallel_for_(Range(0, _dst.rows/2), converter);
6425 else
6426 converter(Range(0, _dst.rows/2));
6427 }
6428
6429 ///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
6430
6431 template<int bIdx>
6432 struct RGB888toYUV420pInvoker: public ParallelLoopBody
6433 {
RGB888toYUV420pInvokercv::RGB888toYUV420pInvoker6434 RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx )
6435 : src_(src),
6436 dst_(dst),
6437 uIdx_(uIdx) { }
6438
operator ()cv::RGB888toYUV420pInvoker6439 void operator()(const Range& rowRange) const
6440 {
6441 const int w = src_.cols;
6442 const int h = src_.rows;
6443
6444 const int cn = src_.channels();
6445 for( int i = rowRange.start; i < rowRange.end; i++ )
6446 {
6447 const uchar* row0 = src_.ptr<uchar>(2 * i);
6448 const uchar* row1 = src_.ptr<uchar>(2 * i + 1);
6449
6450 uchar* y = dst_->ptr<uchar>(2*i);
6451 uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2);
6452 uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2);
6453 if( uIdx_ == 2 ) std::swap(u, v);
6454
6455 for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ )
6456 {
6457 int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j];
6458 int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j];
6459 int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j];
6460 int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j];
6461
6462 const int shifted16 = (16 << ITUR_BT_601_SHIFT);
6463 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1));
6464 int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16;
6465 int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16;
6466 int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16;
6467 int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16;
6468
6469 y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT);
6470 y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT);
6471 y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT);
6472 y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT);
6473
6474 const int shifted128 = (128 << ITUR_BT_601_SHIFT);
6475 int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128;
6476 int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128;
6477
6478 u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT);
6479 v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT);
6480 }
6481 }
6482 }
6483
isFitcv::RGB888toYUV420pInvoker6484 static bool isFit( const Mat& src )
6485 {
6486 return (src.total() >= 320*240);
6487 }
6488
6489 private:
6490 RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&);
6491
6492 const Mat& src_;
6493 Mat* const dst_;
6494 const int uIdx_;
6495 };
6496
6497 template<int bIdx, int uIdx>
cvtRGBtoYUV420p(const Mat & src,Mat & dst)6498 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
6499 {
6500 RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx);
6501 if( RGB888toYUV420pInvoker<bIdx>::isFit(src) )
6502 parallel_for_(Range(0, src.rows/2), colorConverter);
6503 else
6504 colorConverter(Range(0, src.rows/2));
6505 }
6506
6507 ///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
6508
6509 template<int bIdx, int uIdx, int yIdx>
6510 struct YUV422toRGB888Invoker : ParallelLoopBody
6511 {
6512 Mat* dst;
6513 const uchar* src;
6514 int width, stride;
6515
YUV422toRGB888Invokercv::YUV422toRGB888Invoker6516 YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
6517 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
6518
operator ()cv::YUV422toRGB888Invoker6519 void operator()(const Range& range) const
6520 {
6521 int rangeBegin = range.start;
6522 int rangeEnd = range.end;
6523
6524 const int uidx = 1 - yIdx + uIdx * 2;
6525 const int vidx = (2 + uidx) % 4;
6526 const uchar* yuv_src = src + rangeBegin * stride;
6527
6528 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
6529 {
6530 uchar* row = dst->ptr<uchar>(j);
6531
6532 for (int i = 0; i < 2 * width; i += 4, row += 6)
6533 {
6534 int u = int(yuv_src[i + uidx]) - 128;
6535 int v = int(yuv_src[i + vidx]) - 128;
6536
6537 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6538 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6539 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6540
6541 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
6542 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6543 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6544 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6545
6546 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
6547 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6548 row[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6549 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6550 }
6551 }
6552 }
6553 };
6554
6555 template<int bIdx, int uIdx, int yIdx>
6556 struct YUV422toRGBA8888Invoker : ParallelLoopBody
6557 {
6558 Mat* dst;
6559 const uchar* src;
6560 int width, stride;
6561
YUV422toRGBA8888Invokercv::YUV422toRGBA8888Invoker6562 YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
6563 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
6564
operator ()cv::YUV422toRGBA8888Invoker6565 void operator()(const Range& range) const
6566 {
6567 int rangeBegin = range.start;
6568 int rangeEnd = range.end;
6569
6570 const int uidx = 1 - yIdx + uIdx * 2;
6571 const int vidx = (2 + uidx) % 4;
6572 const uchar* yuv_src = src + rangeBegin * stride;
6573
6574 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride)
6575 {
6576 uchar* row = dst->ptr<uchar>(j);
6577
6578 for (int i = 0; i < 2 * width; i += 4, row += 8)
6579 {
6580 int u = int(yuv_src[i + uidx]) - 128;
6581 int v = int(yuv_src[i + vidx]) - 128;
6582
6583 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v;
6584 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u;
6585 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u;
6586
6587 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY;
6588 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT);
6589 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT);
6590 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT);
6591 row[3] = uchar(0xff);
6592
6593 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY;
6594 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT);
6595 row[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT);
6596 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT);
6597 row[7] = uchar(0xff);
6598 }
6599 }
6600 }
6601 };
6602
6603 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240)
6604
6605 template<int bIdx, int uIdx, int yIdx>
cvtYUV422toRGB(Mat & _dst,int _stride,const uchar * _yuv)6606 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
6607 {
6608 YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
6609 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
6610 parallel_for_(Range(0, _dst.rows), converter);
6611 else
6612 converter(Range(0, _dst.rows));
6613 }
6614
6615 template<int bIdx, int uIdx, int yIdx>
cvtYUV422toRGBA(Mat & _dst,int _stride,const uchar * _yuv)6616 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
6617 {
6618 YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
6619 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
6620 parallel_for_(Range(0, _dst.rows), converter);
6621 else
6622 converter(Range(0, _dst.rows));
6623 }
6624
6625 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
6626
6627 template<typename _Tp>
6628 struct RGBA2mRGBA
6629 {
6630 typedef _Tp channel_type;
6631
operator ()cv::RGBA2mRGBA6632 void operator()(const _Tp* src, _Tp* dst, int n) const
6633 {
6634 _Tp max_val = ColorChannel<_Tp>::max();
6635 _Tp half_val = ColorChannel<_Tp>::half();
6636 for( int i = 0; i < n; i++ )
6637 {
6638 _Tp v0 = *src++;
6639 _Tp v1 = *src++;
6640 _Tp v2 = *src++;
6641 _Tp v3 = *src++;
6642
6643 *dst++ = (v0 * v3 + half_val) / max_val;
6644 *dst++ = (v1 * v3 + half_val) / max_val;
6645 *dst++ = (v2 * v3 + half_val) / max_val;
6646 *dst++ = v3;
6647 }
6648 }
6649 };
6650
6651
6652 template<typename _Tp>
6653 struct mRGBA2RGBA
6654 {
6655 typedef _Tp channel_type;
6656
operator ()cv::mRGBA2RGBA6657 void operator()(const _Tp* src, _Tp* dst, int n) const
6658 {
6659 _Tp max_val = ColorChannel<_Tp>::max();
6660 for( int i = 0; i < n; i++ )
6661 {
6662 _Tp v0 = *src++;
6663 _Tp v1 = *src++;
6664 _Tp v2 = *src++;
6665 _Tp v3 = *src++;
6666 _Tp v3_half = v3 / 2;
6667
6668 *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
6669 *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
6670 *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
6671 *dst++ = v3;
6672 }
6673 }
6674 };
6675
6676 #ifdef HAVE_OPENCL
6677
ocl_cvtColor(InputArray _src,OutputArray _dst,int code,int dcn)6678 static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
6679 {
6680 bool ok = false;
6681 UMat src = _src.getUMat(), dst;
6682 Size sz = src.size(), dstSz = sz;
6683 int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx;
6684 int dims = 2, stripeSize = 1;
6685 ocl::Kernel k;
6686
6687 if (depth != CV_8U && depth != CV_16U && depth != CV_32F)
6688 return false;
6689
6690 ocl::Device dev = ocl::Device::getDefault();
6691 int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
6692 int pxPerWIx = 1;
6693
6694 size_t globalsize[] = { src.cols, (src.rows + pxPerWIy - 1) / pxPerWIy };
6695 cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ",
6696 depth, scn, pxPerWIy);
6697
6698 switch (code)
6699 {
6700 case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR:
6701 case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA:
6702 {
6703 CV_Assert(scn == 3 || scn == 4);
6704 dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
6705 bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
6706 k.create("RGB", ocl::imgproc::cvtcolor_oclsrc,
6707 opts + format("-D dcn=%d -D bidx=0 -D %s", dcn,
6708 reverse ? "REVERSE" : "ORDER"));
6709 break;
6710 }
6711 case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
6712 case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
6713 {
6714 dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
6715 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
6716 bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
6717 code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
6718 int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
6719 code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
6720 k.create("RGB5x52RGB", ocl::imgproc::cvtcolor_oclsrc,
6721 opts + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, greenbits));
6722 break;
6723 }
6724 case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
6725 case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555:
6726 {
6727 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
6728 bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
6729 code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
6730 int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
6731 code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
6732 dcn = 2;
6733 k.create("RGB2RGB5x5", ocl::imgproc::cvtcolor_oclsrc,
6734 opts + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, greenbits));
6735 break;
6736 }
6737 case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY:
6738 {
6739 CV_Assert(scn == 2 && depth == CV_8U);
6740 dcn = 1;
6741 int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
6742 k.create("BGR5x52Gray", ocl::imgproc::cvtcolor_oclsrc,
6743 opts + format("-D dcn=1 -D bidx=0 -D greenbits=%d", greenbits));
6744 break;
6745 }
6746 case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555:
6747 {
6748 CV_Assert(scn == 1 && depth == CV_8U);
6749 dcn = 2;
6750 int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
6751 k.create("Gray2BGR5x5", ocl::imgproc::cvtcolor_oclsrc,
6752 opts + format("-D dcn=2 -D bidx=0 -D greenbits=%d", greenbits));
6753 break;
6754 }
6755 case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY:
6756 case COLOR_RGB2GRAY: case COLOR_RGBA2GRAY:
6757 {
6758 CV_Assert(scn == 3 || scn == 4);
6759 bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
6760 dcn = 1;
6761 k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
6762 opts + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d",
6763 bidx, stripeSize));
6764 globalsize[0] = (src.cols + stripeSize-1)/stripeSize;
6765 break;
6766 }
6767 case COLOR_GRAY2BGR:
6768 case COLOR_GRAY2BGRA:
6769 {
6770 CV_Assert(scn == 1);
6771 dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
6772 k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc,
6773 opts + format("-D bidx=0 -D dcn=%d", dcn));
6774 break;
6775 }
6776 case COLOR_BGR2YUV:
6777 case COLOR_RGB2YUV:
6778 {
6779 CV_Assert(scn == 3 || scn == 4);
6780 bidx = code == COLOR_RGB2YUV ? 0 : 2;
6781 dcn = 3;
6782 k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
6783 opts + format("-D dcn=3 -D bidx=%d", bidx));
6784 break;
6785 }
6786 case COLOR_YUV2BGR:
6787 case COLOR_YUV2RGB:
6788 {
6789 if(dcn < 0) dcn = 3;
6790 CV_Assert(dcn == 3 || dcn == 4);
6791 bidx = code == COLOR_YUV2RGB ? 0 : 2;
6792 k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
6793 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
6794 break;
6795 }
6796 case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21:
6797 case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21:
6798 {
6799 CV_Assert( scn == 1 );
6800 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6801 dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ||
6802 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3;
6803 bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ||
6804 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2;
6805 uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 ||
6806 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0;
6807
6808 dstSz = Size(sz.width, sz.height * 2 / 3);
6809 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
6810 k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc,
6811 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx));
6812 break;
6813 }
6814 case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
6815 case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
6816 {
6817 CV_Assert( scn == 1 );
6818 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6819 dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 ||
6820 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3;
6821 bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
6822 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2;
6823 uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 ||
6824 code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0;
6825
6826 dstSz = Size(sz.width, sz.height * 2 / 3);
6827 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy;
6828 k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
6829 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx,
6830 src.isContinuous() ? " -D SRC_CONT" : ""));
6831 break;
6832 }
6833 case COLOR_YUV2GRAY_420:
6834 {
6835 if (dcn <= 0) dcn = 1;
6836
6837 CV_Assert( dcn == 1 );
6838 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
6839
6840 dstSz = Size(sz.width, sz.height * 2 / 3);
6841 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6842 dst = _dst.getUMat();
6843
6844 src.rowRange(0, dstSz.height).copyTo(dst);
6845 return true;
6846 }
6847 case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
6848 case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
6849 {
6850 if (dcn <= 0) dcn = 1;
6851 bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ||
6852 code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2;
6853 uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 ||
6854 code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0;
6855
6856 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
6857 CV_Assert( dcn == 1 );
6858 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
6859
6860 dstSz = Size(sz.width, sz.height / 2 * 3);
6861 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
6862 dst = _dst.getUMat();
6863
6864 if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 &&
6865 dst.step % 4 == 0 && dst.offset % 4 == 0)
6866 {
6867 pxPerWIx = 2;
6868 }
6869 globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy;
6870
6871 k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc,
6872 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx));
6873 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
6874 return k.run(2, globalsize, NULL, false);
6875 }
6876 case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
6877 case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU:
6878 case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
6879 {
6880 if (dcn <= 0)
6881 dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 ||
6882 code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3;
6883
6884 bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 ||
6885 code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2;
6886 yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0;
6887 uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU ||
6888 code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0;
6889 uidx = 1 - yidx + uidx;
6890
6891 CV_Assert( dcn == 3 || dcn == 4 );
6892 CV_Assert( scn == 2 && depth == CV_8U );
6893
6894 k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc,
6895 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx,
6896 src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : ""));
6897 break;
6898 }
6899 case COLOR_BGR2YCrCb:
6900 case COLOR_RGB2YCrCb:
6901 {
6902 CV_Assert(scn == 3 || scn == 4);
6903 bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
6904 dcn = 3;
6905 k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc,
6906 opts + format("-D dcn=3 -D bidx=%d", bidx));
6907 break;
6908 }
6909 case COLOR_YCrCb2BGR:
6910 case COLOR_YCrCb2RGB:
6911 {
6912 if( dcn <= 0 )
6913 dcn = 3;
6914 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
6915 bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
6916 k.create("YCrCb2RGB", ocl::imgproc::cvtcolor_oclsrc,
6917 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
6918 break;
6919 }
6920 case COLOR_BGR2XYZ: case COLOR_RGB2XYZ:
6921 {
6922 CV_Assert(scn == 3 || scn == 4);
6923 bidx = code == COLOR_BGR2XYZ ? 0 : 2;
6924
6925 UMat c;
6926 if (depth == CV_32F)
6927 {
6928 float coeffs[] =
6929 {
6930 0.412453f, 0.357580f, 0.180423f,
6931 0.212671f, 0.715160f, 0.072169f,
6932 0.019334f, 0.119193f, 0.950227f
6933 };
6934 if (bidx == 0)
6935 {
6936 std::swap(coeffs[0], coeffs[2]);
6937 std::swap(coeffs[3], coeffs[5]);
6938 std::swap(coeffs[6], coeffs[8]);
6939 }
6940 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
6941 }
6942 else
6943 {
6944 int coeffs[] =
6945 {
6946 1689, 1465, 739,
6947 871, 2929, 296,
6948 79, 488, 3892
6949 };
6950 if (bidx == 0)
6951 {
6952 std::swap(coeffs[0], coeffs[2]);
6953 std::swap(coeffs[3], coeffs[5]);
6954 std::swap(coeffs[6], coeffs[8]);
6955 }
6956 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
6957 }
6958
6959 _dst.create(dstSz, CV_MAKETYPE(depth, 3));
6960 dst = _dst.getUMat();
6961
6962 k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc,
6963 opts + format("-D dcn=3 -D bidx=%d", bidx));
6964 if (k.empty())
6965 return false;
6966 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
6967 return k.run(2, globalsize, 0, false);
6968 }
6969 case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
6970 {
6971 if (dcn <= 0)
6972 dcn = 3;
6973 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
6974 bidx = code == COLOR_XYZ2BGR ? 0 : 2;
6975
6976 UMat c;
6977 if (depth == CV_32F)
6978 {
6979 float coeffs[] =
6980 {
6981 3.240479f, -1.53715f, -0.498535f,
6982 -0.969256f, 1.875991f, 0.041556f,
6983 0.055648f, -0.204043f, 1.057311f
6984 };
6985 if (bidx == 0)
6986 {
6987 std::swap(coeffs[0], coeffs[6]);
6988 std::swap(coeffs[1], coeffs[7]);
6989 std::swap(coeffs[2], coeffs[8]);
6990 }
6991 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
6992 }
6993 else
6994 {
6995 int coeffs[] =
6996 {
6997 13273, -6296, -2042,
6998 -3970, 7684, 170,
6999 228, -836, 4331
7000 };
7001 if (bidx == 0)
7002 {
7003 std::swap(coeffs[0], coeffs[6]);
7004 std::swap(coeffs[1], coeffs[7]);
7005 std::swap(coeffs[2], coeffs[8]);
7006 }
7007 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
7008 }
7009
7010 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
7011 dst = _dst.getUMat();
7012
7013 k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc,
7014 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
7015 if (k.empty())
7016 return false;
7017 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
7018 return k.run(2, globalsize, 0, false);
7019 }
7020 case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
7021 case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
7022 {
7023 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
7024 bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
7025 code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
7026 int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
7027 code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
7028 bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
7029 String kernelName = String("RGB2") + (is_hsv ? "HSV" : "HLS");
7030 dcn = 3;
7031
7032 if (is_hsv && depth == CV_8U)
7033 {
7034 static UMat sdiv_data;
7035 static UMat hdiv_data180;
7036 static UMat hdiv_data256;
7037 static int sdiv_table[256];
7038 static int hdiv_table180[256];
7039 static int hdiv_table256[256];
7040 static volatile bool initialized180 = false, initialized256 = false;
7041 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
7042
7043 if (!initialized)
7044 {
7045 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
7046 UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
7047
7048 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
7049
7050 int v = 255 << hsv_shift;
7051 if (!initialized180 && !initialized256)
7052 {
7053 for(int i = 1; i < 256; i++ )
7054 sdiv_table[i] = saturate_cast<int>(v/(1.*i));
7055 Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data);
7056 }
7057
7058 v = hrange << hsv_shift;
7059 for (int i = 1; i < 256; i++ )
7060 hdiv_table[i] = saturate_cast<int>(v/(6.*i));
7061
7062 Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data);
7063 initialized = true;
7064 }
7065
7066 _dst.create(dstSz, CV_8UC3);
7067 dst = _dst.getUMat();
7068
7069 k.create("RGB2HSV", ocl::imgproc::cvtcolor_oclsrc,
7070 opts + format("-D hrange=%d -D bidx=%d -D dcn=3",
7071 hrange, bidx));
7072 if (k.empty())
7073 return false;
7074
7075 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
7076 ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :
7077 ocl::KernelArg::PtrReadOnly(hdiv_data180));
7078
7079 return k.run(2, globalsize, NULL, false);
7080 }
7081 else
7082 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
7083 opts + format("-D hscale=%ff -D bidx=%d -D dcn=3",
7084 hrange*(1.f/360.f), bidx));
7085 break;
7086 }
7087 case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
7088 case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
7089 {
7090 if (dcn <= 0)
7091 dcn = 3;
7092 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
7093 bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
7094 code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
7095 int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
7096 code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
7097 bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
7098 code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL;
7099
7100 String kernelName = String(is_hsv ? "HSV" : "HLS") + "2RGB";
7101 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc,
7102 opts + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff",
7103 dcn, bidx, hrange, 6.f/hrange));
7104 break;
7105 }
7106 case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
7107 {
7108 CV_Assert(scn == 4 && depth == CV_8U);
7109 dcn = 4;
7110
7111 k.create(code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA", ocl::imgproc::cvtcolor_oclsrc,
7112 opts + "-D dcn=4 -D bidx=3");
7113 break;
7114 }
7115 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
7116 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
7117 {
7118 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
7119
7120 bidx = code == CV_BGR2Lab || code == CV_LBGR2Lab || code == CV_BGR2Luv || code == CV_LBGR2Luv ? 0 : 2;
7121 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_RGB2Luv || code == CV_BGR2Luv;
7122 bool lab = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_LBGR2Lab || code == CV_LRGB2Lab;
7123 float un, vn;
7124 dcn = 3;
7125
7126 k.create(format("BGR2%s", lab ? "Lab" : "Luv").c_str(),
7127 ocl::imgproc::cvtcolor_oclsrc,
7128 opts + format("-D dcn=%d -D bidx=%d%s",
7129 dcn, bidx, srgb ? " -D SRGB" : ""));
7130 if (k.empty())
7131 return false;
7132
7133 initLabTabs();
7134
7135 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
7136 dst = _dst.getUMat();
7137
7138 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
7139 dstarg = ocl::KernelArg::WriteOnly(dst);
7140
7141 if (depth == CV_8U && lab)
7142 {
7143 static UMat usRGBGammaTab, ulinearGammaTab, uLabCbrtTab, ucoeffs;
7144
7145 if (srgb && usRGBGammaTab.empty())
7146 Mat(1, 256, CV_16UC1, sRGBGammaTab_b).copyTo(usRGBGammaTab);
7147 else if (ulinearGammaTab.empty())
7148 Mat(1, 256, CV_16UC1, linearGammaTab_b).copyTo(ulinearGammaTab);
7149 if (uLabCbrtTab.empty())
7150 Mat(1, LAB_CBRT_TAB_SIZE_B, CV_16UC1, LabCbrtTab_b).copyTo(uLabCbrtTab);
7151
7152 {
7153 int coeffs[9];
7154 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
7155 const float scale[] =
7156 {
7157 (1 << lab_shift)/_whitept[0],
7158 (float)(1 << lab_shift),
7159 (1 << lab_shift)/_whitept[2]
7160 };
7161
7162 for (int i = 0; i < 3; i++ )
7163 {
7164 coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]);
7165 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
7166 coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]);
7167
7168 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
7169 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
7170 }
7171 Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs);
7172 }
7173
7174 const int Lscale = (116*255+50)/100;
7175 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100);
7176
7177 k.args(srcarg, dstarg,
7178 ocl::KernelArg::PtrReadOnly(srgb ? usRGBGammaTab : ulinearGammaTab),
7179 ocl::KernelArg::PtrReadOnly(uLabCbrtTab), ocl::KernelArg::PtrReadOnly(ucoeffs),
7180 Lscale, Lshift);
7181 }
7182 else
7183 {
7184 static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab;
7185
7186 if (srgb && usRGBGammaTab.empty())
7187 Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab);
7188 if (!lab && uLabCbrtTab.empty())
7189 Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab);
7190
7191 {
7192 float coeffs[9];
7193 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
7194 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
7195
7196 for (int i = 0; i < 3; i++)
7197 {
7198 int j = i * 3;
7199 coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1);
7200 coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1);
7201 coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1);
7202
7203 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
7204 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
7205 }
7206
7207 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
7208 un = 13*4*_whitept[0]*d;
7209 vn = 13*9*_whitept[1]*d;
7210
7211 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
7212 }
7213
7214 float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f;
7215 ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
7216
7217 if (lab)
7218 {
7219 if (srgb)
7220 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
7221 ucoeffsarg, _1_3, _a);
7222 else
7223 k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a);
7224 }
7225 else
7226 {
7227 ocl::KernelArg LabCbrtTabarg = ocl::KernelArg::PtrReadOnly(uLabCbrtTab);
7228 if (srgb)
7229 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
7230 LabCbrtTabarg, ucoeffsarg, un, vn);
7231 else
7232 k.args(srcarg, dstarg, LabCbrtTabarg, ucoeffsarg, un, vn);
7233 }
7234 }
7235
7236 return k.run(dims, globalsize, NULL, false);
7237 }
7238 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
7239 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
7240 {
7241 if( dcn <= 0 )
7242 dcn = 3;
7243 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
7244
7245 bidx = code == CV_Lab2BGR || code == CV_Lab2LBGR || code == CV_Luv2BGR || code == CV_Luv2LBGR ? 0 : 2;
7246 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Luv2BGR || code == CV_Luv2RGB;
7247 bool lab = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Lab2LBGR || code == CV_Lab2LRGB;
7248 float un, vn;
7249
7250 k.create(format("%s2BGR", lab ? "Lab" : "Luv").c_str(),
7251 ocl::imgproc::cvtcolor_oclsrc,
7252 opts + format("-D dcn=%d -D bidx=%d%s",
7253 dcn, bidx, srgb ? " -D SRGB" : ""));
7254 if (k.empty())
7255 return false;
7256
7257 initLabTabs();
7258 static UMat ucoeffs, usRGBInvGammaTab;
7259
7260 if (srgb && usRGBInvGammaTab.empty())
7261 Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab);
7262
7263 {
7264 float coeffs[9];
7265 const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65;
7266
7267 for( int i = 0; i < 3; i++ )
7268 {
7269 coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1);
7270 coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1);
7271 coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
7272 }
7273
7274 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
7275 un = 4*_whitept[0]*d;
7276 vn = 9*_whitept[1]*d;
7277
7278 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
7279 }
7280
7281 _dst.create(sz, CV_MAKETYPE(depth, dcn));
7282 dst = _dst.getUMat();
7283
7284 float lThresh = 0.008856f * 903.3f;
7285 float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
7286
7287 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
7288 dstarg = ocl::KernelArg::WriteOnly(dst),
7289 coeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
7290
7291 if (lab)
7292 {
7293 if (srgb)
7294 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
7295 coeffsarg, lThresh, fThresh);
7296 else
7297 k.args(srcarg, dstarg, coeffsarg, lThresh, fThresh);
7298 }
7299 else
7300 {
7301 if (srgb)
7302 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab),
7303 coeffsarg, un, vn);
7304 else
7305 k.args(srcarg, dstarg, coeffsarg, un, vn);
7306 }
7307
7308 return k.run(dims, globalsize, NULL, false);
7309 }
7310 default:
7311 break;
7312 }
7313
7314 if( !k.empty() )
7315 {
7316 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
7317 dst = _dst.getUMat();
7318 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
7319 ok = k.run(dims, globalsize, NULL, false);
7320 }
7321 return ok;
7322 }
7323
7324 #endif
7325
7326 }//namespace cv
7327
7328 //////////////////////////////////////////////////////////////////////////////////////////
7329 // The main function //
7330 //////////////////////////////////////////////////////////////////////////////////////////
7331
cvtColor(InputArray _src,OutputArray _dst,int code,int dcn)7332 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
7333 {
7334 int stype = _src.type();
7335 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx;
7336
7337 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat() && !(depth == CV_8U && (code == CV_Luv2BGR || code == CV_Luv2RGB)),
7338 ocl_cvtColor(_src, _dst, code, dcn) )
7339
7340 Mat src = _src.getMat(), dst;
7341 Size sz = src.size();
7342
7343 CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F );
7344
7345 switch( code )
7346 {
7347 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
7348 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
7349 CV_Assert( scn == 3 || scn == 4 );
7350 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
7351 bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2;
7352
7353 _dst.create( sz, CV_MAKETYPE(depth, dcn));
7354 dst = _dst.getMat();
7355
7356 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7357 CV_IPP_CHECK()
7358 {
7359 if( code == CV_BGR2BGRA)
7360 {
7361 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) )
7362 {
7363 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7364 return;
7365 }
7366 setIppErrorStatus();
7367 }
7368 else if( code == CV_BGRA2BGR )
7369 {
7370 if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) )
7371 {
7372 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7373 return;
7374 }
7375 setIppErrorStatus();
7376 }
7377 else if( code == CV_BGR2RGBA )
7378 {
7379 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) )
7380 {
7381 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7382 return;
7383 }
7384 setIppErrorStatus();
7385 }
7386 else if( code == CV_RGBA2BGR )
7387 {
7388 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) )
7389 {
7390 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7391 return;
7392 }
7393 setIppErrorStatus();
7394 }
7395 else if( code == CV_RGB2BGR )
7396 {
7397 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) )
7398 {
7399 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7400 return;
7401 }
7402 setIppErrorStatus();
7403 }
7404 #if IPP_VERSION_X100 >= 801
7405 else if( code == CV_RGBA2BGRA )
7406 {
7407 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) )
7408 {
7409 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7410 return;
7411 }
7412 setIppErrorStatus();
7413 }
7414 #endif
7415 }
7416 #endif
7417
7418 if( depth == CV_8U )
7419 {
7420 #ifdef HAVE_TEGRA_OPTIMIZATION
7421 if(tegra::useTegra() && tegra::cvtBGR2RGB(src, dst, bidx))
7422 break;
7423 #endif
7424 CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx));
7425 }
7426 else if( depth == CV_16U )
7427 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx));
7428 else
7429 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx));
7430 break;
7431
7432 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
7433 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
7434 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
7435 _dst.create(sz, CV_8UC2);
7436 dst = _dst.getMat();
7437
7438 #if defined(HAVE_IPP) && 0 // breaks OCL accuracy tests
7439 CV_IPP_CHECK()
7440 {
7441 CV_SUPPRESS_DEPRECATED_START
7442
7443 if (code == CV_BGR2BGR565 && scn == 3)
7444 {
7445 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R)))
7446 {
7447 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7448 return;
7449 }
7450 setIppErrorStatus();
7451 }
7452 else if (code == CV_BGRA2BGR565 && scn == 4)
7453 {
7454 if (CvtColorIPPLoopCopy(src, dst,
7455 IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
7456 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 0, 1, 2, depth)))
7457 {
7458 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7459 return;
7460 }
7461 setIppErrorStatus();
7462 }
7463 else if (code == CV_RGB2BGR565 && scn == 3)
7464 {
7465 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
7466 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
7467 {
7468 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7469 return;
7470 }
7471 setIppErrorStatus();
7472 }
7473 else if (code == CV_RGBA2BGR565 && scn == 4)
7474 {
7475 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
7476 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) )
7477 {
7478 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7479 return;
7480 }
7481 setIppErrorStatus();
7482 }
7483 CV_SUPPRESS_DEPRECATED_END
7484 }
7485 #endif
7486
7487 #ifdef HAVE_TEGRA_OPTIMIZATION
7488 if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565 || code == CV_RGBA2BGR565)
7489 if(tegra::useTegra() && tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2))
7490 break;
7491 #endif
7492
7493 CvtColorLoop(src, dst, RGB2RGB5x5(scn,
7494 code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
7495 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
7496 code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
7497 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
7498 ));
7499 break;
7500
7501 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
7502 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
7503 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3;
7504 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
7505 _dst.create(sz, CV_MAKETYPE(depth, dcn));
7506 dst = _dst.getMat();
7507
7508 #ifdef HAVE_IPP
7509 CV_IPP_CHECK()
7510 {
7511 CV_SUPPRESS_DEPRECATED_START
7512 if (code == CV_BGR5652BGR && dcn == 3)
7513 {
7514 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R)))
7515 {
7516 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7517 return;
7518 }
7519 setIppErrorStatus();
7520 }
7521 else if (code == CV_BGR5652RGB && dcn == 3)
7522 {
7523 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
7524 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
7525 {
7526 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7527 return;
7528 }
7529 setIppErrorStatus();
7530 }
7531 else if (code == CV_BGR5652BGRA && dcn == 4)
7532 {
7533 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
7534 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
7535 {
7536 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7537 return;
7538 }
7539 setIppErrorStatus();
7540 }
7541 else if (code == CV_BGR5652RGBA && dcn == 4)
7542 {
7543 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R,
7544 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
7545 {
7546 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7547 return;
7548 }
7549 setIppErrorStatus();
7550 }
7551 CV_SUPPRESS_DEPRECATED_END
7552 }
7553 #endif
7554
7555 CvtColorLoop(src, dst, RGB5x52RGB(dcn,
7556 code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
7557 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
7558 code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
7559 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
7560 ));
7561 break;
7562
7563 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
7564 CV_Assert( scn == 3 || scn == 4 );
7565 _dst.create(sz, CV_MAKETYPE(depth, 1));
7566 dst = _dst.getMat();
7567
7568 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7569 CV_IPP_CHECK()
7570 {
7571 if( code == CV_BGR2GRAY && depth == CV_32F )
7572 {
7573 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) )
7574 {
7575 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7576 return;
7577 }
7578 setIppErrorStatus();
7579 }
7580 else if( code == CV_RGB2GRAY && depth == CV_32F )
7581 {
7582 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) )
7583 {
7584 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7585 return;
7586 }
7587 setIppErrorStatus();
7588 }
7589 else if( code == CV_BGRA2GRAY && depth == CV_32F )
7590 {
7591 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) )
7592 {
7593 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7594 return;
7595 }
7596 setIppErrorStatus();
7597 }
7598 else if( code == CV_RGBA2GRAY && depth == CV_32F )
7599 {
7600 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) )
7601 {
7602 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7603 return;
7604 }
7605 setIppErrorStatus();
7606 }
7607 }
7608 #endif
7609
7610 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
7611
7612 if( depth == CV_8U )
7613 {
7614 #ifdef HAVE_TEGRA_OPTIMIZATION
7615 if(tegra::useTegra() && tegra::cvtRGB2Gray(src, dst, bidx))
7616 break;
7617 #endif
7618 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0));
7619 }
7620 else if( depth == CV_16U )
7621 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0));
7622 else
7623 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0));
7624 break;
7625
7626 case CV_BGR5652GRAY: case CV_BGR5552GRAY:
7627 CV_Assert( scn == 2 && depth == CV_8U );
7628 _dst.create(sz, CV_8UC1);
7629 dst = _dst.getMat();
7630
7631 CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
7632 break;
7633
7634 case CV_GRAY2BGR: case CV_GRAY2BGRA:
7635 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3;
7636 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4));
7637 _dst.create(sz, CV_MAKETYPE(depth, dcn));
7638 dst = _dst.getMat();
7639
7640 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7641 CV_IPP_CHECK()
7642 {
7643 if( code == CV_GRAY2BGR )
7644 {
7645 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
7646 {
7647 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7648 return;
7649 }
7650 setIppErrorStatus();
7651 }
7652 else if( code == CV_GRAY2BGRA )
7653 {
7654 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
7655 {
7656 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7657 return;
7658 }
7659 setIppErrorStatus();
7660 }
7661 }
7662 #endif
7663
7664
7665 if( depth == CV_8U )
7666 {
7667 #ifdef HAVE_TEGRA_OPTIMIZATION
7668 if(tegra::useTegra() && tegra::cvtGray2RGB(src, dst))
7669 break;
7670 #endif
7671 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn));
7672 }
7673 else if( depth == CV_16U )
7674 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn));
7675 else
7676 CvtColorLoop(src, dst, Gray2RGB<float>(dcn));
7677 break;
7678
7679 case CV_GRAY2BGR565: case CV_GRAY2BGR555:
7680 CV_Assert( scn == 1 && depth == CV_8U );
7681 _dst.create(sz, CV_8UC2);
7682 dst = _dst.getMat();
7683
7684 CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
7685 break;
7686
7687 case CV_BGR2YCrCb: case CV_RGB2YCrCb:
7688 case CV_BGR2YUV: case CV_RGB2YUV:
7689 {
7690 CV_Assert( scn == 3 || scn == 4 );
7691 bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2;
7692 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
7693 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
7694 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f;
7695 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i;
7696
7697 _dst.create(sz, CV_MAKETYPE(depth, 3));
7698 dst = _dst.getMat();
7699
7700 #if defined HAVE_IPP && 0
7701 CV_IPP_CHECK()
7702 {
7703 if (code == CV_RGB2YUV && scn == 3 && depth == CV_8U)
7704 {
7705 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R)))
7706 {
7707 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7708 return;
7709 }
7710 setIppErrorStatus();
7711 }
7712 else if (code == CV_BGR2YUV && scn == 3 && depth == CV_8U)
7713 {
7714 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
7715 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
7716 {
7717 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7718 return;
7719 }
7720 setIppErrorStatus();
7721 }
7722 else if (code == CV_RGB2YUV && scn == 4 && depth == CV_8U)
7723 {
7724 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
7725 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth)))
7726 {
7727 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7728 return;
7729 }
7730 setIppErrorStatus();
7731 }
7732 else if (code == CV_BGR2YUV && scn == 4 && depth == CV_8U)
7733 {
7734 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
7735 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth)))
7736 {
7737 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7738 return;
7739 }
7740 setIppErrorStatus();
7741 }
7742 }
7743 #endif
7744
7745 if( depth == CV_8U )
7746 {
7747 #ifdef HAVE_TEGRA_OPTIMIZATION
7748 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::useTegra() && tegra::cvtRGB2YCrCb(src, dst, bidx))
7749 break;
7750 #endif
7751 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i));
7752 }
7753 else if( depth == CV_16U )
7754 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i));
7755 else
7756 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f));
7757 }
7758 break;
7759
7760 case CV_YCrCb2BGR: case CV_YCrCb2RGB:
7761 case CV_YUV2BGR: case CV_YUV2RGB:
7762 {
7763 if( dcn <= 0 ) dcn = 3;
7764 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
7765 bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2;
7766 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
7767 static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
7768 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f;
7769 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i;
7770
7771 _dst.create(sz, CV_MAKETYPE(depth, dcn));
7772 dst = _dst.getMat();
7773
7774 #if defined HAVE_IPP && 0
7775 CV_IPP_CHECK()
7776 {
7777 if (code == CV_YUV2RGB && dcn == 3 && depth == CV_8U)
7778 {
7779 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R)))
7780 {
7781 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7782 return;
7783 }
7784 setIppErrorStatus();
7785 }
7786 else if (code == CV_YUV2BGR && dcn == 3 && depth == CV_8U)
7787 {
7788 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
7789 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)))
7790 {
7791 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7792 return;
7793 }
7794 setIppErrorStatus();
7795 }
7796 else if (code == CV_YUV2RGB && dcn == 4 && depth == CV_8U)
7797 {
7798 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
7799 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)))
7800 {
7801 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7802 return;
7803 }
7804 setIppErrorStatus();
7805 }
7806 else if (code == CV_YUV2BGR && dcn == 4 && depth == CV_8U)
7807 {
7808 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R,
7809 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)))
7810 {
7811 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7812 return;
7813 }
7814 setIppErrorStatus();
7815 }
7816 }
7817 #endif
7818
7819 if( depth == CV_8U )
7820 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i));
7821 else if( depth == CV_16U )
7822 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i));
7823 else
7824 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f));
7825 }
7826 break;
7827
7828 case CV_BGR2XYZ: case CV_RGB2XYZ:
7829 CV_Assert( scn == 3 || scn == 4 );
7830 bidx = code == CV_BGR2XYZ ? 0 : 2;
7831
7832 _dst.create(sz, CV_MAKETYPE(depth, 3));
7833 dst = _dst.getMat();
7834
7835 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7836 CV_IPP_CHECK()
7837 {
7838 if( code == CV_BGR2XYZ && scn == 3 && depth != CV_32F )
7839 {
7840 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
7841 {
7842 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7843 return;
7844 }
7845 setIppErrorStatus();
7846 }
7847 else if( code == CV_BGR2XYZ && scn == 4 && depth != CV_32F )
7848 {
7849 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) )
7850 {
7851 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7852 return;
7853 }
7854 setIppErrorStatus();
7855 }
7856 else if( code == CV_RGB2XYZ && scn == 3 && depth != CV_32F )
7857 {
7858 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) )
7859 {
7860 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7861 return;
7862 }
7863 setIppErrorStatus();
7864 }
7865 else if( code == CV_RGB2XYZ && scn == 4 && depth != CV_32F )
7866 {
7867 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) )
7868 {
7869 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7870 return;
7871 }
7872 setIppErrorStatus();
7873 }
7874 }
7875 #endif
7876
7877 if( depth == CV_8U )
7878 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0));
7879 else if( depth == CV_16U )
7880 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0));
7881 else
7882 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0));
7883 break;
7884
7885 case CV_XYZ2BGR: case CV_XYZ2RGB:
7886 if( dcn <= 0 ) dcn = 3;
7887 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) );
7888 bidx = code == CV_XYZ2BGR ? 0 : 2;
7889
7890 _dst.create(sz, CV_MAKETYPE(depth, dcn));
7891 dst = _dst.getMat();
7892
7893 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7894 CV_IPP_CHECK()
7895 {
7896 if( code == CV_XYZ2BGR && dcn == 3 && depth != CV_32F )
7897 {
7898 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
7899 {
7900 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7901 return;
7902 }
7903 setIppErrorStatus();
7904 }
7905 else if( code == CV_XYZ2BGR && dcn == 4 && depth != CV_32F )
7906 {
7907 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
7908 {
7909 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7910 return;
7911 }
7912 setIppErrorStatus();
7913 }
7914 if( code == CV_XYZ2RGB && dcn == 3 && depth != CV_32F )
7915 {
7916 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) )
7917 {
7918 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7919 return;
7920 }
7921 setIppErrorStatus();
7922 }
7923 else if( code == CV_XYZ2RGB && dcn == 4 && depth != CV_32F )
7924 {
7925 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
7926 {
7927 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7928 return;
7929 }
7930 setIppErrorStatus();
7931 }
7932 }
7933 #endif
7934
7935 if( depth == CV_8U )
7936 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0));
7937 else if( depth == CV_16U )
7938 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0));
7939 else
7940 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0));
7941 break;
7942
7943 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
7944 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
7945 {
7946 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
7947 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
7948 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
7949 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
7950 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
7951
7952 _dst.create(sz, CV_MAKETYPE(depth, 3));
7953 dst = _dst.getMat();
7954
7955 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
7956 CV_IPP_CHECK()
7957 {
7958 if( depth == CV_8U || depth == CV_16U )
7959 {
7960 #if 0 // breaks OCL accuracy tests
7961 if( code == CV_BGR2HSV_FULL && scn == 3 )
7962 {
7963 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
7964 {
7965 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7966 return;
7967 }
7968 setIppErrorStatus();
7969 }
7970 else if( code == CV_BGR2HSV_FULL && scn == 4 )
7971 {
7972 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) )
7973 {
7974 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7975 return;
7976 }
7977 setIppErrorStatus();
7978 }
7979 else if( code == CV_RGB2HSV_FULL && scn == 4 )
7980 {
7981 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) )
7982 {
7983 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7984 return;
7985 }
7986 setIppErrorStatus();
7987 } else
7988 #endif
7989 if( code == CV_RGB2HSV_FULL && scn == 3 && depth == CV_16U )
7990 {
7991 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) )
7992 {
7993 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
7994 return;
7995 }
7996 setIppErrorStatus();
7997 }
7998 else if( code == CV_BGR2HLS_FULL && scn == 3 )
7999 {
8000 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
8001 {
8002 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8003 return;
8004 }
8005 setIppErrorStatus();
8006 }
8007 else if( code == CV_BGR2HLS_FULL && scn == 4 )
8008 {
8009 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) )
8010 {
8011 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8012 return;
8013 }
8014 setIppErrorStatus();
8015 }
8016 else if( code == CV_RGB2HLS_FULL && scn == 3 )
8017 {
8018 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) )
8019 {
8020 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8021 return;
8022 }
8023 setIppErrorStatus();
8024 }
8025 else if( code == CV_RGB2HLS_FULL && scn == 4 )
8026 {
8027 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) )
8028 {
8029 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8030 return;
8031 }
8032 setIppErrorStatus();
8033 }
8034 }
8035 }
8036 #endif
8037
8038 if( code == CV_BGR2HSV || code == CV_RGB2HSV ||
8039 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL )
8040 {
8041 #ifdef HAVE_TEGRA_OPTIMIZATION
8042 if(tegra::useTegra() && tegra::cvtRGB2HSV(src, dst, bidx, hrange))
8043 break;
8044 #endif
8045 if( depth == CV_8U )
8046 CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange));
8047 else
8048 CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange));
8049 }
8050 else
8051 {
8052 if( depth == CV_8U )
8053 CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange));
8054 else
8055 CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange));
8056 }
8057 }
8058 break;
8059
8060 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
8061 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
8062 {
8063 if( dcn <= 0 ) dcn = 3;
8064 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
8065 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
8066 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
8067 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
8068 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
8069
8070 _dst.create(sz, CV_MAKETYPE(depth, dcn));
8071 dst = _dst.getMat();
8072
8073 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
8074 CV_IPP_CHECK()
8075 {
8076 if( depth == CV_8U || depth == CV_16U )
8077 {
8078 if( code == CV_HSV2BGR_FULL && dcn == 3 )
8079 {
8080 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
8081 {
8082 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8083 return;
8084 }
8085 setIppErrorStatus();
8086 }
8087 else if( code == CV_HSV2BGR_FULL && dcn == 4 )
8088 {
8089 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
8090 {
8091 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8092 return;
8093 }
8094 setIppErrorStatus();
8095 }
8096 else if( code == CV_HSV2RGB_FULL && dcn == 3 )
8097 {
8098 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) )
8099 {
8100 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8101 return;
8102 }
8103 setIppErrorStatus();
8104 }
8105 else if( code == CV_HSV2RGB_FULL && dcn == 4 )
8106 {
8107 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
8108 {
8109 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8110 return;
8111 }
8112 setIppErrorStatus();
8113 }
8114 else if( code == CV_HLS2BGR_FULL && dcn == 3 )
8115 {
8116 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
8117 {
8118 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8119 return;
8120 }
8121 setIppErrorStatus();
8122 }
8123 else if( code == CV_HLS2BGR_FULL && dcn == 4 )
8124 {
8125 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
8126 {
8127 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8128 return;
8129 }
8130 setIppErrorStatus();
8131 }
8132 else if( code == CV_HLS2RGB_FULL && dcn == 3 )
8133 {
8134 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) )
8135 {
8136 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8137 return;
8138 }
8139 setIppErrorStatus();
8140 }
8141 else if( code == CV_HLS2RGB_FULL && dcn == 4 )
8142 {
8143 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
8144 {
8145 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8146 return;
8147 }
8148 setIppErrorStatus();
8149 }
8150 }
8151 }
8152 #endif
8153
8154 if( code == CV_HSV2BGR || code == CV_HSV2RGB ||
8155 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL )
8156 {
8157 if( depth == CV_8U )
8158 CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange));
8159 else
8160 CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange));
8161 }
8162 else
8163 {
8164 if( depth == CV_8U )
8165 CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange));
8166 else
8167 CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange));
8168 }
8169 }
8170 break;
8171
8172 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab:
8173 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv:
8174 {
8175 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) );
8176 bidx = code == CV_BGR2Lab || code == CV_BGR2Luv ||
8177 code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2;
8178 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab ||
8179 code == CV_BGR2Luv || code == CV_RGB2Luv;
8180
8181 _dst.create(sz, CV_MAKETYPE(depth, 3));
8182 dst = _dst.getMat();
8183
8184 #if defined HAVE_IPP && 0
8185 CV_IPP_CHECK()
8186 {
8187 if (code == CV_LBGR2Lab && scn == 3 && depth == CV_8U)
8188 {
8189 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToLab_8u_C3R)))
8190 {
8191 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8192 return;
8193 }
8194 setIppErrorStatus();
8195 }
8196 else if (code == CV_LBGR2Lab && scn == 4 && depth == CV_8U)
8197 {
8198 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
8199 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 0, 1, 2, depth)))
8200 {
8201 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8202 return;
8203 }
8204 setIppErrorStatus();
8205 }
8206 else
8207 if (code == CV_LRGB2Lab && scn == 3 && depth == CV_8U) // slower than OpenCV
8208 {
8209 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
8210 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
8211 {
8212 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8213 return;
8214 }
8215 setIppErrorStatus();
8216 }
8217 else if (code == CV_LRGB2Lab && scn == 4 && depth == CV_8U) // slower than OpenCV
8218 {
8219 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
8220 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth)))
8221 {
8222 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8223 return;
8224 }
8225 setIppErrorStatus();
8226 }
8227 else if (code == CV_LRGB2Luv && scn == 3)
8228 {
8229 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGBToLUVTab[depth])))
8230 {
8231 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8232 return;
8233 }
8234 setIppErrorStatus();
8235 }
8236 else if (code == CV_LRGB2Luv && scn == 4)
8237 {
8238 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
8239 ippiRGBToLUVTab[depth], 0, 1, 2, depth)))
8240 {
8241 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8242 return;
8243 }
8244 setIppErrorStatus();
8245 }
8246 else if (code == CV_LBGR2Luv && scn == 3)
8247 {
8248 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth],
8249 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
8250 {
8251 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8252 return;
8253 }
8254 setIppErrorStatus();
8255 }
8256 else if (code == CV_LBGR2Luv && scn == 4)
8257 {
8258 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth],
8259 ippiRGBToLUVTab[depth], 2, 1, 0, depth)))
8260 {
8261 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8262 return;
8263 }
8264 setIppErrorStatus();
8265 }
8266 }
8267 #endif
8268
8269 if( code == CV_BGR2Lab || code == CV_RGB2Lab ||
8270 code == CV_LBGR2Lab || code == CV_LRGB2Lab )
8271 {
8272 if( depth == CV_8U )
8273 CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb));
8274 else
8275 CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb));
8276 }
8277 else
8278 {
8279 if( depth == CV_8U )
8280 CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb));
8281 else
8282 CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb));
8283 }
8284 }
8285 break;
8286
8287 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB:
8288 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB:
8289 {
8290 if( dcn <= 0 ) dcn = 3;
8291 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) );
8292 bidx = code == CV_Lab2BGR || code == CV_Luv2BGR ||
8293 code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2;
8294 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB ||
8295 code == CV_Luv2BGR || code == CV_Luv2RGB;
8296
8297 _dst.create(sz, CV_MAKETYPE(depth, dcn));
8298 dst = _dst.getMat();
8299
8300 #if defined HAVE_IPP && 0
8301 CV_IPP_CHECK()
8302 {
8303 if( code == CV_Lab2LBGR && dcn == 3 && depth == CV_8U)
8304 {
8305 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R)) )
8306 {
8307 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8308 return;
8309 }
8310 setIppErrorStatus();
8311 }
8312 else if( code == CV_Lab2LBGR && dcn == 4 && depth == CV_8U )
8313 {
8314 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
8315 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
8316 {
8317 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8318 return;
8319 }
8320 setIppErrorStatus();
8321 }
8322 if( code == CV_Lab2LRGB && dcn == 3 && depth == CV_8U )
8323 {
8324 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
8325 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
8326 {
8327 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8328 return;
8329 }
8330 setIppErrorStatus();
8331 }
8332 else if( code == CV_Lab2LRGB && dcn == 4 && depth == CV_8U )
8333 {
8334 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R,
8335 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
8336 {
8337 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8338 return;
8339 }
8340 setIppErrorStatus();
8341 }
8342 if( code == CV_Luv2LRGB && dcn == 3 )
8343 {
8344 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiLUVToRGBTab[depth])) )
8345 return;
8346 }
8347 else if( code == CV_Luv2LRGB && dcn == 4 )
8348 {
8349 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
8350 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) )
8351 {
8352 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8353 return;
8354 }
8355 }
8356 if( code == CV_Luv2LBGR && dcn == 3 )
8357 {
8358 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
8359 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) )
8360 {
8361 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8362 return;
8363 }
8364 }
8365 else if( code == CV_Luv2LBGR && dcn == 4 )
8366 {
8367 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth],
8368 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) )
8369 {
8370 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8371 return;
8372 }
8373 }
8374 }
8375 #endif
8376
8377 if( code == CV_Lab2BGR || code == CV_Lab2RGB ||
8378 code == CV_Lab2LBGR || code == CV_Lab2LRGB )
8379 {
8380 if( depth == CV_8U )
8381 CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb));
8382 else
8383 CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb));
8384 }
8385 else
8386 {
8387 if( depth == CV_8U )
8388 CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb));
8389 else
8390 CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb));
8391 }
8392 }
8393 break;
8394
8395 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
8396 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
8397 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
8398 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:
8399 demosaicing(src, _dst, code, dcn);
8400 break;
8401
8402 case CV_YUV2BGR_NV21: case CV_YUV2RGB_NV21: case CV_YUV2BGR_NV12: case CV_YUV2RGB_NV12:
8403 case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12:
8404 {
8405 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
8406 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
8407
8408 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3;
8409 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2;
8410 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0;
8411
8412 CV_Assert( dcn == 3 || dcn == 4 );
8413 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
8414
8415 Size dstSz(sz.width, sz.height * 2 / 3);
8416 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
8417 dst = _dst.getMat();
8418
8419 int srcstep = (int)src.step;
8420 const uchar* y = src.ptr();
8421 const uchar* uv = y + srcstep * dstSz.height;
8422
8423 switch(dcn*100 + bIdx * 10 + uIdx)
8424 {
8425 case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break;
8426 case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break;
8427 case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break;
8428 case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break;
8429 case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break;
8430 case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break;
8431 case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break;
8432 case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break;
8433 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
8434 };
8435 }
8436 break;
8437 case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12:
8438 case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV:
8439 {
8440 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
8441 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
8442
8443 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3;
8444 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2;
8445 const int uIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0;
8446
8447 CV_Assert( dcn == 3 || dcn == 4 );
8448 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
8449
8450 Size dstSz(sz.width, sz.height * 2 / 3);
8451 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
8452 dst = _dst.getMat();
8453
8454 int srcstep = (int)src.step;
8455 const uchar* y = src.ptr();
8456 const uchar* u = y + srcstep * dstSz.height;
8457 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2);
8458
8459 int ustepIdx = 0;
8460 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0;
8461
8462 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); }
8463
8464 switch(dcn*10 + bIdx)
8465 {
8466 case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
8467 case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
8468 case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
8469 case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break;
8470 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
8471 };
8472 }
8473 break;
8474 case CV_YUV2GRAY_420:
8475 {
8476 if (dcn <= 0) dcn = 1;
8477
8478 CV_Assert( dcn == 1 );
8479 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
8480
8481 Size dstSz(sz.width, sz.height * 2 / 3);
8482 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
8483 dst = _dst.getMat();
8484 #if defined HAVE_IPP
8485 CV_IPP_CHECK()
8486 {
8487 if (ippStsNoErr == ippiCopy_8u_C1R(src.data, (int)src.step, dst.data, (int)dst.step,
8488 ippiSize(dstSz.width, dstSz.height)))
8489 {
8490 CV_IMPL_ADD(CV_IMPL_IPP);
8491 return;
8492 }
8493 setIppErrorStatus();
8494 }
8495 #endif
8496 src(Range(0, dstSz.height), Range::all()).copyTo(dst);
8497 }
8498 break;
8499 case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12:
8500 case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV:
8501 {
8502 if (dcn <= 0) dcn = 1;
8503 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2;
8504 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2;
8505
8506 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
8507 CV_Assert( dcn == 1 );
8508 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
8509
8510 Size dstSz(sz.width, sz.height / 2 * 3);
8511 _dst.create(dstSz, CV_MAKETYPE(depth, dcn));
8512 dst = _dst.getMat();
8513
8514 switch(bIdx + uIdx*10)
8515 {
8516 case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break;
8517 case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break;
8518 case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break;
8519 case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break;
8520 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
8521 };
8522 }
8523 break;
8524 case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY:
8525 case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU:
8526 case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU:
8527 {
8528 //http://www.fourcc.org/yuv.php#UYVY
8529 //http://www.fourcc.org/yuv.php#YUY2
8530 //http://www.fourcc.org/yuv.php#YVYU
8531
8532 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3;
8533 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2;
8534 const int ycn = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0;
8535 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0;
8536
8537 CV_Assert( dcn == 3 || dcn == 4 );
8538 CV_Assert( scn == 2 && depth == CV_8U );
8539
8540 _dst.create(sz, CV_8UC(dcn));
8541 dst = _dst.getMat();
8542
8543 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn)
8544 {
8545 case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8546 case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8547 case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8548 case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8549 case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8550 case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8551 case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8552 case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8553 case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8554 case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8555 case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8556 case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8557 case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8558 case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8559 case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break;
8560 case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break;
8561 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break;
8562 };
8563 }
8564 break;
8565 case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2:
8566 {
8567 if (dcn <= 0) dcn = 1;
8568
8569 CV_Assert( dcn == 1 );
8570 CV_Assert( scn == 2 && depth == CV_8U );
8571
8572 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0);
8573 }
8574 break;
8575 case CV_RGBA2mRGBA:
8576 {
8577 if (dcn <= 0) dcn = 4;
8578 CV_Assert( scn == 4 && dcn == 4 );
8579
8580 _dst.create(sz, CV_MAKETYPE(depth, dcn));
8581 dst = _dst.getMat();
8582
8583 if( depth == CV_8U )
8584 {
8585 #if defined(HAVE_IPP)
8586 CV_IPP_CHECK()
8587 {
8588 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R)))
8589 {
8590 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
8591 return;
8592 }
8593 setIppErrorStatus();
8594 }
8595 #endif
8596 CvtColorLoop(src, dst, RGBA2mRGBA<uchar>());
8597 }
8598 else
8599 {
8600 CV_Error( CV_StsBadArg, "Unsupported image depth" );
8601 }
8602 }
8603 break;
8604 case CV_mRGBA2RGBA:
8605 {
8606 if (dcn <= 0) dcn = 4;
8607 CV_Assert( scn == 4 && dcn == 4 );
8608
8609 _dst.create(sz, CV_MAKETYPE(depth, dcn));
8610 dst = _dst.getMat();
8611
8612 if( depth == CV_8U )
8613 CvtColorLoop(src, dst, mRGBA2RGBA<uchar>());
8614 else
8615 {
8616 CV_Error( CV_StsBadArg, "Unsupported image depth" );
8617 }
8618 }
8619 break;
8620 default:
8621 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
8622 }
8623 }
8624
8625 CV_IMPL void
cvCvtColor(const CvArr * srcarr,CvArr * dstarr,int code)8626 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code )
8627 {
8628 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
8629 CV_Assert( src.depth() == dst.depth() );
8630
8631 cv::cvtColor(src, dst, code, dst.channels());
8632 CV_Assert( dst.data == dst0.data );
8633 }
8634
8635
8636 /* End of file. */
8637