1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "opencv2/opencv_modules.hpp"
44 
45 #ifndef HAVE_OPENCV_CUDEV
46 
47 #error "opencv_cudev is required"
48 
49 #else
50 
51 #include "opencv2/cudaarithm.hpp"
52 #include "opencv2/cudev.hpp"
53 #include "opencv2/core/private.cuda.hpp"
54 
55 using namespace cv;
56 using namespace cv::cuda;
57 using namespace cv::cudev;
58 
59 namespace
60 {
61     template <typename T, typename S, typename D>
reduceToRowImpl(const GpuMat & _src,GpuMat & _dst,int reduceOp,Stream & stream)62     void reduceToRowImpl(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream)
63     {
64         const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
65         GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
66 
67         switch (reduceOp)
68         {
69         case cv::REDUCE_SUM:
70             gridReduceToRow< Sum<S> >(src, dst, stream);
71             break;
72 
73         case cv::REDUCE_AVG:
74             gridReduceToRow< Avg<S> >(src, dst, stream);
75             break;
76 
77         case cv::REDUCE_MIN:
78             gridReduceToRow< Min<S> >(src, dst, stream);
79             break;
80 
81         case cv::REDUCE_MAX:
82             gridReduceToRow< Max<S> >(src, dst, stream);
83             break;
84         };
85     }
86 
87     template <typename T, typename S, typename D>
reduceToColumnImpl_(const GpuMat & _src,GpuMat & _dst,int reduceOp,Stream & stream)88     void reduceToColumnImpl_(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream)
89     {
90         const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
91         GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
92 
93         switch (reduceOp)
94         {
95         case cv::REDUCE_SUM:
96             gridReduceToColumn< Sum<S> >(src, dst, stream);
97             break;
98 
99         case cv::REDUCE_AVG:
100             gridReduceToColumn< Avg<S> >(src, dst, stream);
101             break;
102 
103         case cv::REDUCE_MIN:
104             gridReduceToColumn< Min<S> >(src, dst, stream);
105             break;
106 
107         case cv::REDUCE_MAX:
108             gridReduceToColumn< Max<S> >(src, dst, stream);
109             break;
110         };
111     }
112 
113     template <typename T, typename S, typename D>
reduceToColumnImpl(const GpuMat & src,GpuMat & dst,int reduceOp,Stream & stream)114     void reduceToColumnImpl(const GpuMat& src, GpuMat& dst, int reduceOp, Stream& stream)
115     {
116         typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int reduceOp, Stream& stream);
117         static const func_t funcs[4] =
118         {
119             reduceToColumnImpl_<T, S, D>,
120             reduceToColumnImpl_<typename MakeVec<T, 2>::type, typename MakeVec<S, 2>::type, typename MakeVec<D, 2>::type>,
121             reduceToColumnImpl_<typename MakeVec<T, 3>::type, typename MakeVec<S, 3>::type, typename MakeVec<D, 3>::type>,
122             reduceToColumnImpl_<typename MakeVec<T, 4>::type, typename MakeVec<S, 4>::type, typename MakeVec<D, 4>::type>
123         };
124 
125         funcs[src.channels() - 1](src, dst, reduceOp, stream);
126     }
127 }
128 
reduce(InputArray _src,OutputArray _dst,int dim,int reduceOp,int dtype,Stream & stream)129 void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
130 {
131     GpuMat src = getInputMat(_src, stream);
132 
133     CV_Assert( src.channels() <= 4 );
134     CV_Assert( dim == 0 || dim == 1 );
135     CV_Assert( reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_MAX || reduceOp == REDUCE_MIN );
136 
137     if (dtype < 0)
138         dtype = src.depth();
139 
140     GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
141 
142     if (dim == 0)
143     {
144         typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream);
145         static const func_t funcs[7][7] =
146         {
147             {
148                 reduceToRowImpl<uchar, int, uchar>,
149                 0 /*reduceToRowImpl<uchar, int, schar>*/,
150                 0 /*reduceToRowImpl<uchar, int, ushort>*/,
151                 0 /*reduceToRowImpl<uchar, int, short>*/,
152                 reduceToRowImpl<uchar, int, int>,
153                 reduceToRowImpl<uchar, float, float>,
154                 reduceToRowImpl<uchar, double, double>
155             },
156             {
157                 0 /*reduceToRowImpl<schar, int, uchar>*/,
158                 0 /*reduceToRowImpl<schar, int, schar>*/,
159                 0 /*reduceToRowImpl<schar, int, ushort>*/,
160                 0 /*reduceToRowImpl<schar, int, short>*/,
161                 0 /*reduceToRowImpl<schar, int, int>*/,
162                 0 /*reduceToRowImpl<schar, float, float>*/,
163                 0 /*reduceToRowImpl<schar, double, double>*/
164             },
165             {
166                 0 /*reduceToRowImpl<ushort, int, uchar>*/,
167                 0 /*reduceToRowImpl<ushort, int, schar>*/,
168                 reduceToRowImpl<ushort, int, ushort>,
169                 0 /*reduceToRowImpl<ushort, int, short>*/,
170                 reduceToRowImpl<ushort, int, int>,
171                 reduceToRowImpl<ushort, float, float>,
172                 reduceToRowImpl<ushort, double, double>
173             },
174             {
175                 0 /*reduceToRowImpl<short, int, uchar>*/,
176                 0 /*reduceToRowImpl<short, int, schar>*/,
177                 0 /*reduceToRowImpl<short, int, ushort>*/,
178                 reduceToRowImpl<short, int, short>,
179                 reduceToRowImpl<short, int, int>,
180                 reduceToRowImpl<short, float, float>,
181                 reduceToRowImpl<short, double, double>
182             },
183             {
184                 0 /*reduceToRowImpl<int, int, uchar>*/,
185                 0 /*reduceToRowImpl<int, int, schar>*/,
186                 0 /*reduceToRowImpl<int, int, ushort>*/,
187                 0 /*reduceToRowImpl<int, int, short>*/,
188                 reduceToRowImpl<int, int, int>,
189                 reduceToRowImpl<int, float, float>,
190                 reduceToRowImpl<int, double, double>
191             },
192             {
193                 0 /*reduceToRowImpl<float, float, uchar>*/,
194                 0 /*reduceToRowImpl<float, float, schar>*/,
195                 0 /*reduceToRowImpl<float, float, ushort>*/,
196                 0 /*reduceToRowImpl<float, float, short>*/,
197                 0 /*reduceToRowImpl<float, float, int>*/,
198                 reduceToRowImpl<float, float, float>,
199                 reduceToRowImpl<float, double, double>
200             },
201             {
202                 0 /*reduceToRowImpl<double, double, uchar>*/,
203                 0 /*reduceToRowImpl<double, double, schar>*/,
204                 0 /*reduceToRowImpl<double, double, ushort>*/,
205                 0 /*reduceToRowImpl<double, double, short>*/,
206                 0 /*reduceToRowImpl<double, double, int>*/,
207                 0 /*reduceToRowImpl<double, double, float>*/,
208                 reduceToRowImpl<double, double, double>
209             }
210         };
211 
212         const func_t func = funcs[src.depth()][dst.depth()];
213 
214         if (!func)
215             CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats");
216 
217         GpuMat dst_cont = dst.reshape(1);
218         func(src.reshape(1), dst_cont, reduceOp, stream);
219     }
220     else
221     {
222         typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, int reduceOp, Stream& stream);
223         static const func_t funcs[7][7] =
224         {
225             {
226                 reduceToColumnImpl<uchar, int, uchar>,
227                 0 /*reduceToColumnImpl<uchar, int, schar>*/,
228                 0 /*reduceToColumnImpl<uchar, int, ushort>*/,
229                 0 /*reduceToColumnImpl<uchar, int, short>*/,
230                 reduceToColumnImpl<uchar, int, int>,
231                 reduceToColumnImpl<uchar, float, float>,
232                 reduceToColumnImpl<uchar, double, double>
233             },
234             {
235                 0 /*reduceToColumnImpl<schar, int, uchar>*/,
236                 0 /*reduceToColumnImpl<schar, int, schar>*/,
237                 0 /*reduceToColumnImpl<schar, int, ushort>*/,
238                 0 /*reduceToColumnImpl<schar, int, short>*/,
239                 0 /*reduceToColumnImpl<schar, int, int>*/,
240                 0 /*reduceToColumnImpl<schar, float, float>*/,
241                 0 /*reduceToColumnImpl<schar, double, double>*/
242             },
243             {
244                 0 /*reduceToColumnImpl<ushort, int, uchar>*/,
245                 0 /*reduceToColumnImpl<ushort, int, schar>*/,
246                 reduceToColumnImpl<ushort, int, ushort>,
247                 0 /*reduceToColumnImpl<ushort, int, short>*/,
248                 reduceToColumnImpl<ushort, int, int>,
249                 reduceToColumnImpl<ushort, float, float>,
250                 reduceToColumnImpl<ushort, double, double>
251             },
252             {
253                 0 /*reduceToColumnImpl<short, int, uchar>*/,
254                 0 /*reduceToColumnImpl<short, int, schar>*/,
255                 0 /*reduceToColumnImpl<short, int, ushort>*/,
256                 reduceToColumnImpl<short, int, short>,
257                 reduceToColumnImpl<short, int, int>,
258                 reduceToColumnImpl<short, float, float>,
259                 reduceToColumnImpl<short, double, double>
260             },
261             {
262                 0 /*reduceToColumnImpl<int, int, uchar>*/,
263                 0 /*reduceToColumnImpl<int, int, schar>*/,
264                 0 /*reduceToColumnImpl<int, int, ushort>*/,
265                 0 /*reduceToColumnImpl<int, int, short>*/,
266                 reduceToColumnImpl<int, int, int>,
267                 reduceToColumnImpl<int, float, float>,
268                 reduceToColumnImpl<int, double, double>
269             },
270             {
271                 0 /*reduceToColumnImpl<float, float, uchar>*/,
272                 0 /*reduceToColumnImpl<float, float, schar>*/,
273                 0 /*reduceToColumnImpl<float, float, ushort>*/,
274                 0 /*reduceToColumnImpl<float, float, short>*/,
275                 0 /*reduceToColumnImpl<float, float, int>*/,
276                 reduceToColumnImpl<float, float, float>,
277                 reduceToColumnImpl<float, double, double>
278             },
279             {
280                 0 /*reduceToColumnImpl<double, double, uchar>*/,
281                 0 /*reduceToColumnImpl<double, double, schar>*/,
282                 0 /*reduceToColumnImpl<double, double, ushort>*/,
283                 0 /*reduceToColumnImpl<double, double, short>*/,
284                 0 /*reduceToColumnImpl<double, double, int>*/,
285                 0 /*reduceToColumnImpl<double, double, float>*/,
286                 reduceToColumnImpl<double, double, double>
287             }
288         };
289 
290         const func_t func = funcs[src.depth()][dst.depth()];
291 
292         if (!func)
293             CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of input and output array formats");
294 
295         func(src, dst, reduceOp, stream);
296     }
297 
298     syncOutput(dst, _dst, stream);
299 }
300 
301 #endif
302