1 #include <iomanip>
2 #include <stdexcept>
3 #include <string>
4 #include "performance.h"
5 #include "opencv2/core/cuda.hpp"
6
7 using namespace std;
8 using namespace cv;
9 using namespace cv::cuda;
10
run()11 void TestSystem::run()
12 {
13 if (is_list_mode_)
14 {
15 for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
16 cout << (*it)->name() << endl;
17
18 return;
19 }
20
21 // Run test initializers
22 for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
23 {
24 if ((*it)->name().find(test_filter_, 0) != string::npos)
25 (*it)->run();
26 }
27
28 printHeading();
29
30 // Run tests
31 for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
32 {
33 try
34 {
35 if ((*it)->name().find(test_filter_, 0) != string::npos)
36 {
37 cout << endl << (*it)->name() << ":\n";
38 (*it)->run();
39 finishCurrentSubtest();
40 }
41 }
42 catch (const Exception&)
43 {
44 // Message is printed via callback
45 resetCurrentSubtest();
46 }
47 catch (const runtime_error& e)
48 {
49 printError(e.what());
50 resetCurrentSubtest();
51 }
52 }
53
54 printSummary();
55 }
56
57
finishCurrentSubtest()58 void TestSystem::finishCurrentSubtest()
59 {
60 if (cur_subtest_is_empty_)
61 // There is no need to print subtest statistics
62 return;
63
64 double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
65 double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
66
67 double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
68 speedup_total_ += speedup;
69
70 printMetrics(cpu_time, gpu_time, speedup);
71
72 num_subtests_called_++;
73 resetCurrentSubtest();
74 }
75
76
meanTime(const vector<int64> & samples)77 double TestSystem::meanTime(const vector<int64> &samples)
78 {
79 double sum = accumulate(samples.begin(), samples.end(), 0.);
80 if (samples.size() > 1)
81 return (sum - samples[0]) / (samples.size() - 1);
82 return sum;
83 }
84
85
printHeading()86 void TestSystem::printHeading()
87 {
88 cout << endl;
89 cout << setiosflags(ios_base::left);
90 cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
91 << setw(14) << "SPEEDUP"
92 << "DESCRIPTION\n";
93 cout << resetiosflags(ios_base::left);
94 }
95
96
printSummary()97 void TestSystem::printSummary()
98 {
99 cout << setiosflags(ios_base::fixed);
100 cout << "\naverage GPU speedup: x"
101 << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
102 << endl;
103 cout << resetiosflags(ios_base::fixed);
104 }
105
106
printMetrics(double cpu_time,double gpu_time,double speedup)107 void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
108 {
109 cout << TAB << setiosflags(ios_base::left);
110 stringstream stream;
111
112 stream << cpu_time;
113 cout << setw(10) << stream.str();
114
115 stream.str("");
116 stream << gpu_time;
117 cout << setw(10) << stream.str();
118
119 stream.str("");
120 stream << "x" << setprecision(3) << speedup;
121 cout << setw(14) << stream.str();
122
123 cout << cur_subtest_description_.str();
124 cout << resetiosflags(ios_base::left) << endl;
125 }
126
127
printError(const std::string & msg)128 void TestSystem::printError(const std::string& msg)
129 {
130 cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
131 }
132
133
gen(Mat & mat,int rows,int cols,int type,Scalar low,Scalar high)134 void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
135 {
136 mat.create(rows, cols, type);
137 RNG rng(0);
138 rng.fill(mat, RNG::UNIFORM, low, high);
139 }
140
141
abspath(const string & relpath)142 string abspath(const string& relpath)
143 {
144 return TestSystem::instance().workingDir() + relpath;
145 }
146
147
cvErrorCallback(int,const char *,const char * err_msg,const char *,int,void *)148 static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
149 const char* err_msg, const char* /*file_name*/,
150 int /*line*/, void* /*userdata*/)
151 {
152 TestSystem::instance().printError(err_msg);
153 return 0;
154 }
155
156
main(int argc,const char * argv[])157 int main(int argc, const char* argv[])
158 {
159 int num_devices = getCudaEnabledDeviceCount();
160 if (num_devices == 0)
161 {
162 cerr << "No GPU found or the library was compiled without CUDA support";
163 return -1;
164 }
165
166 redirectError(cvErrorCallback);
167
168 const char* keys =
169 "{ h help | | print help message }"
170 "{ f filter | | filter for test }"
171 "{ w workdir | | set working directory }"
172 "{ l list | | show all tests }"
173 "{ d device | 0 | device id }"
174 "{ i iters | 10 | iteration count }";
175
176 CommandLineParser cmd(argc, argv, keys);
177
178 if (cmd.has("help") || !cmd.check())
179 {
180 cmd.printMessage();
181 cmd.printErrors();
182 return 0;
183 }
184
185
186 int device = cmd.get<int>("device");
187 if (device < 0 || device >= num_devices)
188 {
189 cerr << "Invalid device ID" << endl;
190 return -1;
191 }
192 DeviceInfo dev_info(device);
193 if (!dev_info.isCompatible())
194 {
195 cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
196 return -1;
197 }
198 setDevice(device);
199 printShortCudaDeviceInfo(device);
200
201 string filter = cmd.get<string>("filter");
202 string workdir = cmd.get<string>("workdir");
203 bool list = cmd.has("list");
204 int iters = cmd.get<int>("iters");
205
206 if (!filter.empty())
207 TestSystem::instance().setTestFilter(filter);
208
209 if (!workdir.empty())
210 {
211 if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
212 workdir += '/';
213
214 TestSystem::instance().setWorkingDir(workdir);
215 }
216
217 if (list)
218 TestSystem::instance().setListMode(true);
219
220 TestSystem::instance().setNumIters(iters);
221
222 cout << "\nNote: the timings for GPU don't include data transfer" << endl;
223
224 TestSystem::instance().run();
225
226 return 0;
227 }
228