1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "test_precomp.hpp"
44 
45 #ifdef HAVE_CUDA
46 
47 using namespace cvtest;
48 
49 //////////////////////////////////////////////////////////////////////////////
50 // GEMM
51 
52 #ifdef HAVE_CUBLAS
53 
54 CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
55 #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
56 
PARAM_TEST_CASE(GEMM,cv::cuda::DeviceInfo,cv::Size,MatType,GemmFlags,UseRoi)57 PARAM_TEST_CASE(GEMM, cv::cuda::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
58 {
59     cv::cuda::DeviceInfo devInfo;
60     cv::Size size;
61     int type;
62     int flags;
63     bool useRoi;
64 
65     virtual void SetUp()
66     {
67         devInfo = GET_PARAM(0);
68         size = GET_PARAM(1);
69         type = GET_PARAM(2);
70         flags = GET_PARAM(3);
71         useRoi = GET_PARAM(4);
72 
73         cv::cuda::setDevice(devInfo.deviceID());
74     }
75 };
76 
CUDA_TEST_P(GEMM,Accuracy)77 CUDA_TEST_P(GEMM, Accuracy)
78 {
79     cv::Mat src1 = randomMat(size, type, -10.0, 10.0);
80     cv::Mat src2 = randomMat(size, type, -10.0, 10.0);
81     cv::Mat src3 = randomMat(size, type, -10.0, 10.0);
82     double alpha = randomDouble(-10.0, 10.0);
83     double beta = randomDouble(-10.0, 10.0);
84 
85     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
86     {
87         try
88         {
89             cv::cuda::GpuMat dst;
90             cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
91         }
92         catch (const cv::Exception& e)
93         {
94             ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
95         }
96     }
97     else if (type == CV_64FC2 && flags != 0)
98     {
99         try
100         {
101             cv::cuda::GpuMat dst;
102             cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
103         }
104         catch (const cv::Exception& e)
105         {
106             ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
107         }
108     }
109     else
110     {
111         cv::cuda::GpuMat dst = createMat(size, type, useRoi);
112         cv::cuda::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
113 
114         cv::Mat dst_gold;
115         cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
116 
117         EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
118     }
119 }
120 
121 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, GEMM, testing::Combine(
122     ALL_DEVICES,
123     DIFFERENT_SIZES,
124     testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
125     ALL_GEMM_FLAGS,
126     WHOLE_SUBMAT));
127 
128 ////////////////////////////////////////////////////////////////////////////
129 // MulSpectrums
130 
131 CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
132 
PARAM_TEST_CASE(MulSpectrums,cv::cuda::DeviceInfo,cv::Size,DftFlags)133 PARAM_TEST_CASE(MulSpectrums, cv::cuda::DeviceInfo, cv::Size, DftFlags)
134 {
135     cv::cuda::DeviceInfo devInfo;
136     cv::Size size;
137     int flag;
138 
139     cv::Mat a, b;
140 
141     virtual void SetUp()
142     {
143         devInfo = GET_PARAM(0);
144         size = GET_PARAM(1);
145         flag = GET_PARAM(2);
146 
147         cv::cuda::setDevice(devInfo.deviceID());
148 
149         a = randomMat(size, CV_32FC2);
150         b = randomMat(size, CV_32FC2);
151     }
152 };
153 
CUDA_TEST_P(MulSpectrums,Simple)154 CUDA_TEST_P(MulSpectrums, Simple)
155 {
156     cv::cuda::GpuMat c;
157     cv::cuda::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
158 
159     cv::Mat c_gold;
160     cv::mulSpectrums(a, b, c_gold, flag, false);
161 
162     EXPECT_MAT_NEAR(c_gold, c, 1e-2);
163 }
164 
CUDA_TEST_P(MulSpectrums,Scaled)165 CUDA_TEST_P(MulSpectrums, Scaled)
166 {
167     float scale = 1.f / size.area();
168 
169     cv::cuda::GpuMat c;
170     cv::cuda::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
171 
172     cv::Mat c_gold;
173     cv::mulSpectrums(a, b, c_gold, flag, false);
174     c_gold.convertTo(c_gold, c_gold.type(), scale);
175 
176     EXPECT_MAT_NEAR(c_gold, c, 1e-2);
177 }
178 
179 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MulSpectrums, testing::Combine(
180     ALL_DEVICES,
181     DIFFERENT_SIZES,
182     testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
183 
184 ////////////////////////////////////////////////////////////////////////////
185 // Dft
186 
187 struct Dft : testing::TestWithParam<cv::cuda::DeviceInfo>
188 {
189     cv::cuda::DeviceInfo devInfo;
190 
SetUpDft191     virtual void SetUp()
192     {
193         devInfo = GetParam();
194 
195         cv::cuda::setDevice(devInfo.deviceID());
196     }
197 };
198 
199 namespace
200 {
testC2C(const std::string & hint,int cols,int rows,int flags,bool inplace)201     void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
202     {
203         SCOPED_TRACE(hint);
204 
205         cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
206 
207         cv::Mat b_gold;
208         cv::dft(a, b_gold, flags);
209 
210         cv::cuda::GpuMat d_b;
211         cv::cuda::GpuMat d_b_data;
212         if (inplace)
213         {
214             d_b_data.create(1, a.size().area(), CV_32FC2);
215             d_b = cv::cuda::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
216         }
217         cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
218 
219         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
220         ASSERT_EQ(CV_32F, d_b.depth());
221         ASSERT_EQ(2, d_b.channels());
222         EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
223     }
224 }
225 
CUDA_TEST_P(Dft,C2C)226 CUDA_TEST_P(Dft, C2C)
227 {
228     int cols = randomInt(2, 100);
229     int rows = randomInt(2, 100);
230 
231     for (int i = 0; i < 2; ++i)
232     {
233         bool inplace = i != 0;
234 
235         testC2C("no flags", cols, rows, 0, inplace);
236         testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
237         testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
238         testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
239         testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
240         testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
241         testC2C("single col", 1, rows, 0, inplace);
242         testC2C("single row", cols, 1, 0, inplace);
243         testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
244         testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
245         testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
246         testC2C("size 1 2", 1, 2, 0, inplace);
247         testC2C("size 2 1", 2, 1, 0, inplace);
248     }
249 }
250 
251 namespace
252 {
testR2CThenC2R(const std::string & hint,int cols,int rows,bool inplace)253     void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
254     {
255         SCOPED_TRACE(hint);
256 
257         cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
258 
259         cv::cuda::GpuMat d_b, d_c;
260         cv::cuda::GpuMat d_b_data, d_c_data;
261         if (inplace)
262         {
263             if (a.cols == 1)
264             {
265                 d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
266                 d_b = cv::cuda::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
267             }
268             else
269             {
270                 d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
271                 d_b = cv::cuda::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
272             }
273             d_c_data.create(1, a.size().area(), CV_32F);
274             d_c = cv::cuda::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
275         }
276 
277         cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
278         cv::cuda::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
279 
280         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
281         EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
282         ASSERT_EQ(CV_32F, d_c.depth());
283         ASSERT_EQ(1, d_c.channels());
284 
285         cv::Mat c(d_c);
286         EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
287     }
288 }
289 
CUDA_TEST_P(Dft,R2CThenC2R)290 CUDA_TEST_P(Dft, R2CThenC2R)
291 {
292     int cols = randomInt(2, 100);
293     int rows = randomInt(2, 100);
294 
295     testR2CThenC2R("sanity", cols, rows, false);
296     testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
297     testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
298     testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
299     testR2CThenC2R("single col", 1, rows, false);
300     testR2CThenC2R("single col 1", 1, rows + 1, false);
301     testR2CThenC2R("single row", cols, 1, false);
302     testR2CThenC2R("single row 1", cols + 1, 1, false);
303 
304     testR2CThenC2R("sanity", cols, rows, true);
305     testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
306     testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
307     testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
308     testR2CThenC2R("single row", cols, 1, true);
309     testR2CThenC2R("single row 1", cols + 1, 1, true);
310 }
311 
312 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Dft, ALL_DEVICES);
313 
314 ////////////////////////////////////////////////////////
315 // Convolve
316 
317 namespace
318 {
convolveDFT(const cv::Mat & A,const cv::Mat & B,cv::Mat & C,bool ccorr=false)319     void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
320     {
321         // reallocate the output array if needed
322         C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
323         cv::Size dftSize;
324 
325         // compute the size of DFT transform
326         dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
327         dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
328 
329         // allocate temporary buffers and initialize them with 0s
330         cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
331         cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
332 
333         // copy A and B to the top-left corners of tempA and tempB, respectively
334         cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
335         A.copyTo(roiA);
336         cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
337         B.copyTo(roiB);
338 
339         // now transform the padded A & B in-place;
340         // use "nonzeroRows" hint for faster processing
341         cv::dft(tempA, tempA, 0, A.rows);
342         cv::dft(tempB, tempB, 0, B.rows);
343 
344         // multiply the spectrums;
345         // the function handles packed spectrum representations well
346         cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
347 
348         // transform the product back from the frequency domain.
349         // Even though all the result rows will be non-zero,
350         // you need only the first C.rows of them, and thus you
351         // pass nonzeroRows == C.rows
352         cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
353 
354         // now copy the result back to C.
355         tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
356     }
357 
358     IMPLEMENT_PARAM_CLASS(KSize, int)
359     IMPLEMENT_PARAM_CLASS(Ccorr, bool)
360 }
361 
PARAM_TEST_CASE(Convolve,cv::cuda::DeviceInfo,cv::Size,KSize,Ccorr)362 PARAM_TEST_CASE(Convolve, cv::cuda::DeviceInfo, cv::Size, KSize, Ccorr)
363 {
364     cv::cuda::DeviceInfo devInfo;
365     cv::Size size;
366     int ksize;
367     bool ccorr;
368 
369     virtual void SetUp()
370     {
371         devInfo = GET_PARAM(0);
372         size = GET_PARAM(1);
373         ksize = GET_PARAM(2);
374         ccorr = GET_PARAM(3);
375 
376         cv::cuda::setDevice(devInfo.deviceID());
377     }
378 };
379 
CUDA_TEST_P(Convolve,Accuracy)380 CUDA_TEST_P(Convolve, Accuracy)
381 {
382     cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
383     cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
384 
385     cv::Ptr<cv::cuda::Convolution> conv = cv::cuda::createConvolution();
386 
387     cv::cuda::GpuMat dst;
388     conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr);
389 
390     cv::Mat dst_gold;
391     convolveDFT(src, kernel, dst_gold, ccorr);
392 
393     EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
394 }
395 
396 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Convolve, testing::Combine(
397     ALL_DEVICES,
398     DIFFERENT_SIZES,
399     testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
400     testing::Values(Ccorr(false), Ccorr(true))));
401 
402 #endif // HAVE_CUBLAS
403 
404 #endif // HAVE_CUDA
405