1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "opencv2/opencv_modules.hpp"
44
45 #ifndef HAVE_OPENCV_CUDEV
46
47 #error "opencv_cudev is required"
48
49 #else
50
51 #include "opencv2/cudaarithm.hpp"
52 #include "opencv2/cudev.hpp"
53 #include "opencv2/core/private.cuda.hpp"
54
55 using namespace cv;
56 using namespace cv::cuda;
57 using namespace cv::cudev;
58
59 namespace
60 {
61 template <typename T, typename S, typename D>
reduceToRowImpl(const GpuMat & _src,GpuMat & _dst,int reduceOp,Stream & stream)62 void reduceToRowImpl(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream)
63 {
64 const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
65 GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
66
67 switch (reduceOp)
68 {
69 case cv::REDUCE_SUM:
70 gridReduceToRow< Sum<S> >(src, dst, stream);
71 break;
72
73 case cv::REDUCE_AVG:
74 gridReduceToRow< Avg<S> >(src, dst, stream);
75 break;
76
77 case cv::REDUCE_MIN:
78 gridReduceToRow< Min<S> >(src, dst, stream);
79 break;
80
81 case cv::REDUCE_MAX:
82 gridReduceToRow< Max<S> >(src, dst, stream);
83 break;
84 };
85 }
86
87 template <typename T, typename S, typename D>
reduceToColumnImpl_(const GpuMat & _src,GpuMat & _dst,int reduceOp,Stream & stream)88 void reduceToColumnImpl_(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream)
89 {
90 const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
91 GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
92
93 switch (reduceOp)
94 {
95 case cv::REDUCE_SUM:
96 gridReduceToColumn< Sum<S> >(src, dst, stream);
97 break;
98
99 case cv::REDUCE_AVG:
100 gridReduceToColumn< Avg<S> >(src, dst, stream);
101 break;
102
103 case cv::REDUCE_MIN:
104 gridReduceToColumn< Min<S> >(src, dst, stream);
105 break;
106
107 case cv::REDUCE_MAX:
108 gridReduceToColumn< Max<S> >(src, dst, stream);
109 break;
110 };
111 }
112
113 template <typename T, typename S, typename D>
reduceToColumnImpl(const GpuMat & src,GpuMat & dst,int reduceOp,Stream & stream)114 void reduceToColumnImpl(const GpuMat& src, GpuMat& dst, int reduceOp, Stream& stream)
115 {
116 typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int reduceOp, Stream& stream);
117 static const func_t funcs[4] =
118 {
119 reduceToColumnImpl_<T, S, D>,
120 reduceToColumnImpl_<typename MakeVec<T, 2>::type, typename MakeVec<S, 2>::type, typename MakeVec<D, 2>::type>,
121 reduceToColumnImpl_<typename MakeVec<T, 3>::type, typename MakeVec<S, 3>::type, typename MakeVec<D, 3>::type>,
122 reduceToColumnImpl_<typename MakeVec<T, 4>::type, typename MakeVec<S, 4>::type, typename MakeVec<D, 4>::type>
123 };
124
125 funcs[src.channels() - 1](src, dst, reduceOp, stream);
126 }
127 }
128
reduce(InputArray _src,OutputArray _dst,int dim,int reduceOp,int dtype,Stream & stream)129 void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
130 {
131 GpuMat src = getInputMat(_src, stream);
132
133 CV_Assert( src.channels() <= 4 );
134 CV_Assert( dim == 0 || dim == 1 );
135 CV_Assert( reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_MAX || reduceOp == REDUCE_MIN );
136
137 if (dtype < 0)
138 dtype = src.depth();
139
140 GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
141
142 if (dim == 0)
143 {
144 typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream);
145 static const func_t funcs[7][7] =
146 {
147 {
148 reduceToRowImpl<uchar, int, uchar>,
149 0 /*reduceToRowImpl<uchar, int, schar>*/,
150 0 /*reduceToRowImpl<uchar, int, ushort>*/,
151 0 /*reduceToRowImpl<uchar, int, short>*/,
152 reduceToRowImpl<uchar, int, int>,
153 reduceToRowImpl<uchar, float, float>,
154 reduceToRowImpl<uchar, double, double>
155 },
156 {
157 0 /*reduceToRowImpl<schar, int, uchar>*/,
158 0 /*reduceToRowImpl<schar, int, schar>*/,
159 0 /*reduceToRowImpl<schar, int, ushort>*/,
160 0 /*reduceToRowImpl<schar, int, short>*/,
161 0 /*reduceToRowImpl<schar, int, int>*/,
162 0 /*reduceToRowImpl<schar, float, float>*/,
163 0 /*reduceToRowImpl<schar, double, double>*/
164 },
165 {
166 0 /*reduceToRowImpl<ushort, int, uchar>*/,
167 0 /*reduceToRowImpl<ushort, int, schar>*/,
168 reduceToRowImpl<ushort, int, ushort>,
169 0 /*reduceToRowImpl<ushort, int, short>*/,
170 reduceToRowImpl<ushort, int, int>,
171 reduceToRowImpl<ushort, float, float>,
172 reduceToRowImpl<ushort, double, double>
173 },
174 {
175 0 /*reduceToRowImpl<short, int, uchar>*/,
176 0 /*reduceToRowImpl<short, int, schar>*/,
177 0 /*reduceToRowImpl<short, int, ushort>*/,
178 reduceToRowImpl<short, int, short>,
179 reduceToRowImpl<short, int, int>,
180 reduceToRowImpl<short, float, float>,
181 reduceToRowImpl<short, double, double>
182 },
183 {
184 0 /*reduceToRowImpl<int, int, uchar>*/,
185 0 /*reduceToRowImpl<int, int, schar>*/,
186 0 /*reduceToRowImpl<int, int, ushort>*/,
187 0 /*reduceToRowImpl<int, int, short>*/,
188 reduceToRowImpl<int, int, int>,
189 reduceToRowImpl<int, float, float>,
190 reduceToRowImpl<int, double, double>
191 },
192 {
193 0 /*reduceToRowImpl<float, float, uchar>*/,
194 0 /*reduceToRowImpl<float, float, schar>*/,
195 0 /*reduceToRowImpl<float, float, ushort>*/,
196 0 /*reduceToRowImpl<float, float, short>*/,
197 0 /*reduceToRowImpl<float, float, int>*/,
198 reduceToRowImpl<float, float, float>,
199 reduceToRowImpl<float, double, double>
200 },
201 {
202 0 /*reduceToRowImpl<double, double, uchar>*/,
203 0 /*reduceToRowImpl<double, double, schar>*/,
204 0 /*reduceToRowImpl<double, double, ushort>*/,
205 0 /*reduceToRowImpl<double, double, short>*/,
206 0 /*reduceToRowImpl<double, double, int>*/,
207 0 /*reduceToRowImpl<double, double, float>*/,
208 reduceToRowImpl<double, double, double>
209 }
210 };
211
212 const func_t func = funcs[src.depth()][dst.depth()];
213
214 if (!func)
215 CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats");
216
217 GpuMat dst_cont = dst.reshape(1);
218 func(src.reshape(1), dst_cont, reduceOp, stream);
219 }
220 else
221 {
222 typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream);
223 static const func_t funcs[7][7] =
224 {
225 {
226 reduceToColumnImpl<uchar, int, uchar>,
227 0 /*reduceToColumnImpl<uchar, int, schar>*/,
228 0 /*reduceToColumnImpl<uchar, int, ushort>*/,
229 0 /*reduceToColumnImpl<uchar, int, short>*/,
230 reduceToColumnImpl<uchar, int, int>,
231 reduceToColumnImpl<uchar, float, float>,
232 reduceToColumnImpl<uchar, double, double>
233 },
234 {
235 0 /*reduceToColumnImpl<schar, int, uchar>*/,
236 0 /*reduceToColumnImpl<schar, int, schar>*/,
237 0 /*reduceToColumnImpl<schar, int, ushort>*/,
238 0 /*reduceToColumnImpl<schar, int, short>*/,
239 0 /*reduceToColumnImpl<schar, int, int>*/,
240 0 /*reduceToColumnImpl<schar, float, float>*/,
241 0 /*reduceToColumnImpl<schar, double, double>*/
242 },
243 {
244 0 /*reduceToColumnImpl<ushort, int, uchar>*/,
245 0 /*reduceToColumnImpl<ushort, int, schar>*/,
246 reduceToColumnImpl<ushort, int, ushort>,
247 0 /*reduceToColumnImpl<ushort, int, short>*/,
248 reduceToColumnImpl<ushort, int, int>,
249 reduceToColumnImpl<ushort, float, float>,
250 reduceToColumnImpl<ushort, double, double>
251 },
252 {
253 0 /*reduceToColumnImpl<short, int, uchar>*/,
254 0 /*reduceToColumnImpl<short, int, schar>*/,
255 0 /*reduceToColumnImpl<short, int, ushort>*/,
256 reduceToColumnImpl<short, int, short>,
257 reduceToColumnImpl<short, int, int>,
258 reduceToColumnImpl<short, float, float>,
259 reduceToColumnImpl<short, double, double>
260 },
261 {
262 0 /*reduceToColumnImpl<int, int, uchar>*/,
263 0 /*reduceToColumnImpl<int, int, schar>*/,
264 0 /*reduceToColumnImpl<int, int, ushort>*/,
265 0 /*reduceToColumnImpl<int, int, short>*/,
266 reduceToColumnImpl<int, int, int>,
267 reduceToColumnImpl<int, float, float>,
268 reduceToColumnImpl<int, double, double>
269 },
270 {
271 0 /*reduceToColumnImpl<float, float, uchar>*/,
272 0 /*reduceToColumnImpl<float, float, schar>*/,
273 0 /*reduceToColumnImpl<float, float, ushort>*/,
274 0 /*reduceToColumnImpl<float, float, short>*/,
275 0 /*reduceToColumnImpl<float, float, int>*/,
276 reduceToColumnImpl<float, float, float>,
277 reduceToColumnImpl<float, double, double>
278 },
279 {
280 0 /*reduceToColumnImpl<double, double, uchar>*/,
281 0 /*reduceToColumnImpl<double, double, schar>*/,
282 0 /*reduceToColumnImpl<double, double, ushort>*/,
283 0 /*reduceToColumnImpl<double, double, short>*/,
284 0 /*reduceToColumnImpl<double, double, int>*/,
285 0 /*reduceToColumnImpl<double, double, float>*/,
286 reduceToColumnImpl<double, double, double>
287 }
288 };
289
290 const func_t func = funcs[src.depth()][dst.depth()];
291
292 if (!func)
293 CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats");
294
295 func(src, dst, reduceOp, stream);
296 }
297
298 syncOutput(dst, _dst, stream);
299 }
300
301 #endif
302