1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44
45 #if defined WIN32 || defined WINCE
46 #include <windows.h>
47 #undef small
48 #undef min
49 #undef max
50 #undef abs
51 #endif
52
53 #if defined __linux__ || defined __APPLE__
54 #include <unistd.h>
55 #include <stdio.h>
56 #include <sys/types.h>
57 #if defined ANDROID
58 #include <sys/sysconf.h>
59 #elif defined __APPLE__
60 #include <sys/sysctl.h>
61 #endif
62 #endif
63
64 #ifdef _OPENMP
65 #define HAVE_OPENMP
66 #endif
67
68 #ifdef __APPLE__
69 #define HAVE_GCD
70 #endif
71
72 #if defined _MSC_VER && _MSC_VER >= 1600
73 #define HAVE_CONCURRENCY
74 #endif
75
76 /* IMPORTANT: always use the same order of defines
77 1. HAVE_TBB - 3rdparty library, should be explicitly enabled
78 2. HAVE_CSTRIPES - 3rdparty library, should be explicitly enabled
79 3. HAVE_OPENMP - integrated to compiler, should be explicitly enabled
80 4. HAVE_GCD - system wide, used automatically (APPLE only)
81 5. WINRT - system wide, used automatically (Windows RT only)
82 6. HAVE_CONCURRENCY - part of runtime, used automatically (Windows only - MSVS 10, MSVS 11)
83 */
84
85 #if defined HAVE_TBB
86 #include "tbb/tbb_stddef.h"
87 #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
88 #include "tbb/tbb.h"
89 #include "tbb/task.h"
90 #if TBB_INTERFACE_VERSION >= 6100
91 #include "tbb/task_arena.h"
92 #endif
93 #undef min
94 #undef max
95 #else
96 #undef HAVE_TBB
97 #endif // end TBB version
98 #endif
99
100 #ifndef HAVE_TBB
101 #if defined HAVE_CSTRIPES
102 #include "C=.h"
103 #undef shared
104 #elif defined HAVE_OPENMP
105 #include <omp.h>
106 #elif defined HAVE_GCD
107 #include <dispatch/dispatch.h>
108 #include <pthread.h>
109 #elif defined WINRT
110 #include <ppltasks.h>
111 #elif defined HAVE_CONCURRENCY
112 #include <ppl.h>
113 #endif
114 #endif
115
116 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
117 # define CV_PARALLEL_FRAMEWORK "tbb"
118 #elif defined HAVE_CSTRIPES
119 # define CV_PARALLEL_FRAMEWORK "cstripes"
120 #elif defined HAVE_OPENMP
121 # define CV_PARALLEL_FRAMEWORK "openmp"
122 #elif defined HAVE_GCD
123 # define CV_PARALLEL_FRAMEWORK "gcd"
124 #elif defined WINRT
125 # define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
126 #elif defined HAVE_CONCURRENCY
127 # define CV_PARALLEL_FRAMEWORK "ms-concurrency"
128 #elif defined HAVE_PTHREADS
129 # define CV_PARALLEL_FRAMEWORK "pthreads"
130 #endif
131
132 namespace cv
133 {
~ParallelLoopBody()134 ParallelLoopBody::~ParallelLoopBody() {}
135 }
136
137 namespace
138 {
139 #ifdef CV_PARALLEL_FRAMEWORK
140 class ParallelLoopBodyWrapper
141 {
142 public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)143 ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
144 {
145 body = &_body;
146 wholeRange = _r;
147 double len = wholeRange.end - wholeRange.start;
148 nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
149 }
operator ()(const cv::Range & sr) const150 void operator()(const cv::Range& sr) const
151 {
152 cv::Range r;
153 r.start = (int)(wholeRange.start +
154 ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
155 r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
156 ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
157 (*body)(r);
158 }
stripeRange() const159 cv::Range stripeRange() const { return cv::Range(0, nstripes); }
160
161 protected:
162 const cv::ParallelLoopBody* body;
163 cv::Range wholeRange;
164 int nstripes;
165 };
166
167 #if defined HAVE_TBB
168 class ProxyLoopBody : public ParallelLoopBodyWrapper
169 {
170 public:
ProxyLoopBody(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)171 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
172 : ParallelLoopBodyWrapper(_body, _r, _nstripes)
173 {}
174
operator ()(const tbb::blocked_range<int> & range) const175 void operator ()(const tbb::blocked_range<int>& range) const
176 {
177 this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
178 }
179 };
180 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
181 typedef ParallelLoopBodyWrapper ProxyLoopBody;
182 #elif defined HAVE_GCD
183 typedef ParallelLoopBodyWrapper ProxyLoopBody;
block_function(void * context,size_t index)184 static void block_function(void* context, size_t index)
185 {
186 ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
187 (*ptr_body)(cv::Range((int)index, (int)index + 1));
188 }
189 #elif defined WINRT || defined HAVE_CONCURRENCY
190 class ProxyLoopBody : public ParallelLoopBodyWrapper
191 {
192 public:
ProxyLoopBody(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)193 ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
194 : ParallelLoopBodyWrapper(_body, _r, _nstripes)
195 {}
196
operator ()(int i) const197 void operator ()(int i) const
198 {
199 this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
200 }
201 };
202 #else
203 typedef ParallelLoopBodyWrapper ProxyLoopBody;
204 #endif
205
206 static int numThreads = -1;
207
208 #if defined HAVE_TBB
209 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
210 #elif defined HAVE_CSTRIPES
211 // nothing for C=
212 #elif defined HAVE_OPENMP
213 static int numThreadsMax = omp_get_max_threads();
214 #elif defined HAVE_GCD
215 // nothing for GCD
216 #elif defined WINRT
217 // nothing for WINRT
218 #elif defined HAVE_CONCURRENCY
219
220 class SchedPtr
221 {
222 Concurrency::Scheduler* sched_;
223 public:
operator ->()224 Concurrency::Scheduler* operator->() { return sched_; }
operator Concurrency::Scheduler*()225 operator Concurrency::Scheduler*() { return sched_; }
226
operator =(Concurrency::Scheduler * sched)227 void operator=(Concurrency::Scheduler* sched)
228 {
229 if (sched_) sched_->Release();
230 sched_ = sched;
231 }
232
SchedPtr()233 SchedPtr() : sched_(0) {}
~SchedPtr()234 ~SchedPtr() { *this = 0; }
235 };
236 static SchedPtr pplScheduler;
237
238 #endif
239
240 #endif // CV_PARALLEL_FRAMEWORK
241
242 } //namespace
243
244 /* ================================ parallel_for_ ================================ */
245
parallel_for_(const cv::Range & range,const cv::ParallelLoopBody & body,double nstripes)246 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
247 {
248 #ifdef CV_PARALLEL_FRAMEWORK
249
250 if(numThreads != 0)
251 {
252 ProxyLoopBody pbody(body, range, nstripes);
253 cv::Range stripeRange = pbody.stripeRange();
254 if( stripeRange.end - stripeRange.start == 1 )
255 {
256 body(range);
257 return;
258 }
259
260 #if defined HAVE_TBB
261
262 tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
263
264 #elif defined HAVE_CSTRIPES
265
266 parallel(MAX(0, numThreads))
267 {
268 int offset = stripeRange.start;
269 int len = stripeRange.end - offset;
270 Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
271 pbody(r);
272 barrier();
273 }
274
275 #elif defined HAVE_OPENMP
276
277 #pragma omp parallel for schedule(dynamic)
278 for (int i = stripeRange.start; i < stripeRange.end; ++i)
279 pbody(Range(i, i + 1));
280
281 #elif defined HAVE_GCD
282
283 dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
284 dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
285
286 #elif defined WINRT
287
288 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
289
290 #elif defined HAVE_CONCURRENCY
291
292 if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
293 {
294 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
295 }
296 else
297 {
298 pplScheduler->Attach();
299 Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
300 Concurrency::CurrentScheduler::Detach();
301 }
302
303 #elif defined HAVE_PTHREADS
304 void parallel_for_pthreads(const Range& range, const ParallelLoopBody& body, double nstripes);
305 parallel_for_pthreads(range, body, nstripes);
306
307 #else
308
309 #error You have hacked and compiling with unsupported parallel framework
310
311 #endif
312
313 }
314 else
315
316 #endif // CV_PARALLEL_FRAMEWORK
317 {
318 (void)nstripes;
319 body(range);
320 }
321 }
322
getNumThreads(void)323 int cv::getNumThreads(void)
324 {
325 #ifdef CV_PARALLEL_FRAMEWORK
326
327 if(numThreads == 0)
328 return 1;
329
330 #endif
331
332 #if defined HAVE_TBB
333
334 return tbbScheduler.is_active()
335 ? numThreads
336 : tbb::task_scheduler_init::default_num_threads();
337
338 #elif defined HAVE_CSTRIPES
339
340 return numThreads > 0
341 ? numThreads
342 : cv::getNumberOfCPUs();
343
344 #elif defined HAVE_OPENMP
345
346 return omp_get_max_threads();
347
348 #elif defined HAVE_GCD
349
350 return 512; // the GCD thread pool limit
351
352 #elif defined WINRT
353
354 return 0;
355
356 #elif defined HAVE_CONCURRENCY
357
358 return 1 + (pplScheduler == 0
359 ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
360 : pplScheduler->GetNumberOfVirtualProcessors());
361
362 #elif defined HAVE_PTHREADS
363
364 size_t parallel_pthreads_get_threads_num();
365
366 return parallel_pthreads_get_threads_num();
367
368 #else
369
370 return 1;
371
372 #endif
373 }
374
setNumThreads(int threads)375 void cv::setNumThreads( int threads )
376 {
377 (void)threads;
378 #ifdef CV_PARALLEL_FRAMEWORK
379 numThreads = threads;
380 #endif
381
382 #ifdef HAVE_TBB
383
384 if(tbbScheduler.is_active()) tbbScheduler.terminate();
385 if(threads > 0) tbbScheduler.initialize(threads);
386
387 #elif defined HAVE_CSTRIPES
388
389 return; // nothing needed
390
391 #elif defined HAVE_OPENMP
392
393 if(omp_in_parallel())
394 return; // can't change number of openmp threads inside a parallel region
395
396 omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
397
398 #elif defined HAVE_GCD
399
400 // unsupported
401 // there is only private dispatch_queue_set_width() and only for desktop
402
403 #elif defined WINRT
404
405 return;
406
407 #elif defined HAVE_CONCURRENCY
408
409 if (threads <= 0)
410 {
411 pplScheduler = 0;
412 }
413 else if (threads == 1)
414 {
415 // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
416 numThreads = 0;
417 }
418 else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
419 {
420 pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
421 Concurrency::MinConcurrency, threads-1,
422 Concurrency::MaxConcurrency, threads-1));
423 }
424
425 #elif defined HAVE_PTHREADS
426
427 void parallel_pthreads_set_threads_num(int num);
428
429 parallel_pthreads_set_threads_num(threads);
430
431 #endif
432 }
433
434
getThreadNum(void)435 int cv::getThreadNum(void)
436 {
437 #if defined HAVE_TBB
438 #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
439 return tbb::task_arena::current_slot();
440 #else
441 return 0;
442 #endif
443 #elif defined HAVE_CSTRIPES
444 return pix();
445 #elif defined HAVE_OPENMP
446 return omp_get_thread_num();
447 #elif defined HAVE_GCD
448 return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
449 #elif defined WINRT
450 return 0;
451 #elif defined HAVE_CONCURRENCY
452 return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
453 #else
454 return 0;
455 #endif
456 }
457
458 #ifdef ANDROID
getNumberOfCPUsImpl()459 static inline int getNumberOfCPUsImpl()
460 {
461 FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
462 if(!cpuPossible)
463 return 1;
464
465 char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
466 char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
467 fclose(cpuPossible);
468 if(!pbuf)
469 return 1;
470
471 //parse string of form "0-1,3,5-7,10,13-15"
472 int cpusAvailable = 0;
473
474 while(*pbuf)
475 {
476 const char* pos = pbuf;
477 bool range = false;
478 while(*pbuf && *pbuf != ',')
479 {
480 if(*pbuf == '-') range = true;
481 ++pbuf;
482 }
483 if(*pbuf) *pbuf++ = 0;
484 if(!range)
485 ++cpusAvailable;
486 else
487 {
488 int rstart = 0, rend = 0;
489 sscanf(pos, "%d-%d", &rstart, &rend);
490 cpusAvailable += rend - rstart + 1;
491 }
492
493 }
494 return cpusAvailable ? cpusAvailable : 1;
495 }
496 #endif
497
getNumberOfCPUs(void)498 int cv::getNumberOfCPUs(void)
499 {
500 #if defined WIN32 || defined _WIN32
501 SYSTEM_INFO sysinfo;
502 #if defined(_M_ARM) || defined(_M_X64) || defined(WINRT)
503 GetNativeSystemInfo( &sysinfo );
504 #else
505 GetSystemInfo( &sysinfo );
506 #endif
507
508 return (int)sysinfo.dwNumberOfProcessors;
509 #elif defined ANDROID
510 static int ncpus = getNumberOfCPUsImpl();
511 return ncpus;
512 #elif defined __linux__
513 return (int)sysconf( _SC_NPROCESSORS_ONLN );
514 #elif defined __APPLE__
515 int numCPU=0;
516 int mib[4];
517 size_t len = sizeof(numCPU);
518
519 /* set the mib for hw.ncpu */
520 mib[0] = CTL_HW;
521 mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
522
523 /* get the number of CPUs from the system */
524 sysctl(mib, 2, &numCPU, &len, NULL, 0);
525
526 if( numCPU < 1 )
527 {
528 mib[1] = HW_NCPU;
529 sysctl( mib, 2, &numCPU, &len, NULL, 0 );
530
531 if( numCPU < 1 )
532 numCPU = 1;
533 }
534
535 return (int)numCPU;
536 #else
537 return 1;
538 #endif
539 }
540
currentParallelFramework()541 const char* cv::currentParallelFramework() {
542 #ifdef CV_PARALLEL_FRAMEWORK
543 return CV_PARALLEL_FRAMEWORK;
544 #else
545 return NULL;
546 #endif
547 }
548
cvSetNumThreads(int nt)549 CV_IMPL void cvSetNumThreads(int nt)
550 {
551 cv::setNumThreads(nt);
552 }
553
cvGetNumThreads()554 CV_IMPL int cvGetNumThreads()
555 {
556 return cv::getNumThreads();
557 }
558
cvGetThreadNum()559 CV_IMPL int cvGetThreadNum()
560 {
561 return cv::getThreadNum();
562 }
563