1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 /*
44 * NV12ToARGB color space conversion CUDA kernel
45 *
46 * This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
47 * source and converts to output in ARGB format
48 */
49
50 #include "opencv2/opencv_modules.hpp"
51
52 #ifndef HAVE_OPENCV_CUDEV
53
54 #error "opencv_cudev is required"
55
56 #else
57
58 #include "opencv2/cudev/common.hpp"
59
60 using namespace cv;
61 using namespace cv::cudev;
62
63 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height);
64
65 namespace
66 {
67 __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
68
YUV2RGB(const uint * yuvi,float * red,float * green,float * blue)69 __device__ static void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
70 {
71 float luma, chromaCb, chromaCr;
72
73 // Prepare for hue adjustment
74 luma = (float)yuvi[0];
75 chromaCb = (float)((int)yuvi[1] - 512.0f);
76 chromaCr = (float)((int)yuvi[2] - 512.0f);
77
78 // Convert YUV To RGB with hue adjustment
79 *red = (luma * constHueColorSpaceMat[0]) +
80 (chromaCb * constHueColorSpaceMat[1]) +
81 (chromaCr * constHueColorSpaceMat[2]);
82
83 *green = (luma * constHueColorSpaceMat[3]) +
84 (chromaCb * constHueColorSpaceMat[4]) +
85 (chromaCr * constHueColorSpaceMat[5]);
86
87 *blue = (luma * constHueColorSpaceMat[6]) +
88 (chromaCb * constHueColorSpaceMat[7]) +
89 (chromaCr * constHueColorSpaceMat[8]);
90 }
91
RGBA_pack_10bit(float red,float green,float blue,uint alpha)92 __device__ static uint RGBA_pack_10bit(float red, float green, float blue, uint alpha)
93 {
94 uint ARGBpixel = 0;
95
96 // Clamp final 10 bit results
97 red = ::fmin(::fmax(red, 0.0f), 1023.f);
98 green = ::fmin(::fmax(green, 0.0f), 1023.f);
99 blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
100
101 // Convert to 8 bit unsigned integers per color component
102 ARGBpixel = (((uint)blue >> 2) |
103 (((uint)green >> 2) << 8) |
104 (((uint)red >> 2) << 16) |
105 (uint)alpha);
106
107 return ARGBpixel;
108 }
109
110 // CUDA kernel for outputing the final ARGB output from NV12
111
112 #define COLOR_COMPONENT_BIT_SIZE 10
113 #define COLOR_COMPONENT_MASK 0x3FF
114
NV12_to_RGB(const uchar * srcImage,size_t nSourcePitch,uint * dstImage,size_t nDestPitch,uint width,uint height)115 __global__ void NV12_to_RGB(const uchar* srcImage, size_t nSourcePitch,
116 uint* dstImage, size_t nDestPitch,
117 uint width, uint height)
118 {
119 // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
120 const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
121 const int y = blockIdx.y * blockDim.y + threadIdx.y;
122
123 if (x >= width || y >= height)
124 return;
125
126 // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
127 // if we move to texture we could read 4 luminance values
128
129 uint yuv101010Pel[2];
130
131 yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
132 yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
133
134 const size_t chromaOffset = nSourcePitch * height;
135
136 const int y_chroma = y >> 1;
137
138 if (y & 1) // odd scanline ?
139 {
140 uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
141 uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
142
143 if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
144 {
145 chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
146 chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
147 }
148
149 yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
150 yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
151
152 yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
153 yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
154 }
155 else
156 {
157 yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
158 yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
159
160 yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
161 yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
162 }
163
164 // this steps performs the color conversion
165 uint yuvi[6];
166 float red[2], green[2], blue[2];
167
168 yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
169 yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
170 yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
171
172 yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
173 yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
174 yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
175
176 // YUV to RGB Transformation conversion
177 YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
178 YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
179
180 // Clamp the results to RGBA
181
182 const size_t dstImagePitch = nDestPitch >> 2;
183
184 dstImage[y * dstImagePitch + x ] = RGBA_pack_10bit(red[0], green[0], blue[0], ((uint)0xff << 24));
185 dstImage[y * dstImagePitch + x + 1 ] = RGBA_pack_10bit(red[1], green[1], blue[1], ((uint)0xff << 24));
186 }
187 }
188
videoDecPostProcessFrame(const GpuMat & decodedFrame,OutputArray _outFrame,int width,int height)189 void videoDecPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
190 {
191 // Final Stage: NV12toARGB color space conversion
192
193 _outFrame.create(height, width, CV_8UC4);
194 GpuMat outFrame = _outFrame.getGpuMat();
195
196 dim3 block(32, 8);
197 dim3 grid(divUp(width, 2 * block.x), divUp(height, block.y));
198
199 NV12_to_RGB<<<grid, block>>>(decodedFrame.ptr<uchar>(), decodedFrame.step,
200 outFrame.ptr<uint>(), outFrame.step,
201 width, height);
202
203 CV_CUDEV_SAFE_CALL( cudaGetLastError() );
204 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
205 }
206
207 #endif
208