1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 /*
44  * NV12ToARGB color space conversion CUDA kernel
45  *
46  * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
47  * source and converts to output in ARGB format
48  */
49 
50 #include "opencv2/opencv_modules.hpp"
51 
52 #ifndef HAVE_OPENCV_CUDEV
53 
54 #error "opencv_cudev is required"
55 
56 #else
57 
58 #include "opencv2/cudev/common.hpp"
59 
60 using namespace cv;
61 using namespace cv::cudev;
62 
63 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height);
64 
65 namespace
66 {
67     __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
68 
YUV2RGB(const uint * yuvi,float * red,float * green,float * blue)69     __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
70     {
71         float luma, chromaCb, chromaCr;
72 
73         // Prepare for hue adjustment
74         luma     = (float)yuvi[0];
75         chromaCb = (float)((int)yuvi[1] - 512.0f);
76         chromaCr = (float)((int)yuvi[2] - 512.0f);
77 
78        // Convert YUV To RGB with hue adjustment
79        *red   = (luma     * constHueColorSpaceMat[0]) +
80                 (chromaCb * constHueColorSpaceMat[1]) +
81                 (chromaCr * constHueColorSpaceMat[2]);
82 
83        *green = (luma     * constHueColorSpaceMat[3]) +
84                 (chromaCb * constHueColorSpaceMat[4]) +
85                 (chromaCr * constHueColorSpaceMat[5]);
86 
87        *blue  = (luma     * constHueColorSpaceMat[6]) +
88                 (chromaCb * constHueColorSpaceMat[7]) +
89                 (chromaCr * constHueColorSpaceMat[8]);
90     }
91 
RGBA_pack_10bit(float red,float green,float blue,uint alpha)92     __device__ static uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
93     {
94         uint ARGBpixel = 0;
95 
96         // Clamp final 10 bit results
97         red   = ::fmin(::fmax(red,   0.0f), 1023.f);
98         green = ::fmin(::fmax(green, 0.0f), 1023.f);
99         blue  = ::fmin(::fmax(blue,  0.0f), 1023.f);
100 
101         // Convert to 8 bit unsigned integers per color component
102         ARGBpixel = (((uint)blue  >> 2) |
103                     (((uint)green >> 2) << 8)  |
104                     (((uint)red   >> 2) << 16) |
105                     (uint)alpha);
106 
107         return ARGBpixel;
108     }
109 
110     // CUDA kernel for outputing the final ARGB output from NV12
111 
112     #define COLOR_COMPONENT_BIT_SIZE 10
113     #define COLOR_COMPONENT_MASK     0x3FF
114 
NV12_to_RGB(const uchar * srcImage,size_t nSourcePitch,uint * dstImage,size_t nDestPitch,uint width,uint height)115     __global__ void NV12_to_RGB(const uchar* srcImage, size_t nSourcePitch,
116                                   uint* dstImage, size_t nDestPitch,
117                                   uint width, uint height)
118     {
119         // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
120         const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
121         const int y = blockIdx.y *  blockDim.y       +  threadIdx.y;
122 
123         if (x >= width || y >= height)
124             return;
125 
126         // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
127         // if we move to texture we could read 4 luminance values
128 
129         uint yuv101010Pel[2];
130 
131         yuv101010Pel[0] = (srcImage[y * nSourcePitch + x    ]) << 2;
132         yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
133 
134         const size_t chromaOffset = nSourcePitch * height;
135 
136         const int y_chroma = y >> 1;
137 
138         if (y & 1)  // odd scanline ?
139         {
140             uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x    ];
141             uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
142 
143             if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
144             {
145                 chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x    ] + 1) >> 1;
146                 chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
147             }
148 
149             yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
150             yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
151 
152             yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE       + 2));
153             yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
154         }
155         else
156         {
157             yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
158             yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
159 
160             yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x    ] << ( COLOR_COMPONENT_BIT_SIZE       + 2));
161             yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
162         }
163 
164         // this steps performs the color conversion
165         uint yuvi[6];
166         float red[2], green[2], blue[2];
167 
168         yuvi[0] =  (yuv101010Pel[0] &   COLOR_COMPONENT_MASK    );
169         yuvi[1] = ((yuv101010Pel[0] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
170         yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
171 
172         yuvi[3] =  (yuv101010Pel[1] &   COLOR_COMPONENT_MASK    );
173         yuvi[4] = ((yuv101010Pel[1] >>  COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
174         yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
175 
176         // YUV to RGB Transformation conversion
177         YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
178         YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
179 
180         // Clamp the results to RGBA
181 
182         const size_t dstImagePitch = nDestPitch >> 2;
183 
184         dstImage[y * dstImagePitch + x     ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24));
185         dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24));
186     }
187 }
188 
videoDecPostProcessFrame(const GpuMat & decodedFrame,OutputArray _outFrame,int width,int height)189 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
190 {
191     // Final Stage: NV12toARGB color space conversion
192 
193     _outFrame.create(height, width, CV_8UC4);
194     GpuMat outFrame = _outFrame.getGpuMat();
195 
196     dim3 block(32, 8);
197     dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
198 
199     NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step,
200                                  outFrame.ptr<uint>(), outFrame.step,
201                                  width, height);
202 
203     CV_CUDEV_SAFE_CALL( cudaGetLastError() );
204     CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
205 }
206 
207 #endif
208