1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 #if defined WIN32 || defined WINCE
46     #include <windows.h>
47     #undef small
48     #undef min
49     #undef max
50     #undef abs
51 #endif
52 
53 #if defined __linux__ || defined __APPLE__
54     #include <unistd.h>
55     #include <stdio.h>
56     #include <sys/types.h>
57     #if defined ANDROID
58         #include <sys/sysconf.h>
59     #elif defined __APPLE__
60         #include <sys/sysctl.h>
61     #endif
62 #endif
63 
64 #ifdef _OPENMP
65     #define HAVE_OPENMP
66 #endif
67 
68 #ifdef __APPLE__
69     #define HAVE_GCD
70 #endif
71 
72 #if defined _MSC_VER && _MSC_VER >= 1600
73     #define HAVE_CONCURRENCY
74 #endif
75 
76 /* IMPORTANT: always use the same order of defines
77    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
78    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
79    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
80    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
81    5. WINRT            - system wide, used automatically        (Windows RT only)
82    6. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
83 */
84 
85 #if defined HAVE_TBB
86     #include "tbb/tbb_stddef.h"
87     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
88         #include "tbb/tbb.h"
89         #include "tbb/task.h"
90         #if TBB_INTERFACE_VERSION >= 6100
91             #include "tbb/task_arena.h"
92         #endif
93         #undef min
94         #undef max
95     #else
96         #undef HAVE_TBB
97     #endif // end TBB version
98 #endif
99 
100 #ifndef HAVE_TBB
101     #if defined HAVE_CSTRIPES
102         #include "C=.h"
103         #undef shared
104     #elif defined HAVE_OPENMP
105         #include <omp.h>
106     #elif defined HAVE_GCD
107         #include <dispatch/dispatch.h>
108         #include <pthread.h>
109     #elif defined WINRT
110         #include <ppltasks.h>
111     #elif defined HAVE_CONCURRENCY
112         #include <ppl.h>
113     #endif
114 #endif
115 
116 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
117 #  define CV_PARALLEL_FRAMEWORK "tbb"
118 #elif defined HAVE_CSTRIPES
119 #  define CV_PARALLEL_FRAMEWORK "cstripes"
120 #elif defined HAVE_OPENMP
121 #  define CV_PARALLEL_FRAMEWORK "openmp"
122 #elif defined HAVE_GCD
123 #  define CV_PARALLEL_FRAMEWORK "gcd"
124 #elif defined WINRT
125 #  define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
126 #elif defined HAVE_CONCURRENCY
127 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
128 #elif defined HAVE_PTHREADS
129 #  define CV_PARALLEL_FRAMEWORK "pthreads"
130 #endif
131 
132 namespace cv
133 {
~ParallelLoopBody()134     ParallelLoopBody::~ParallelLoopBody() {}
135 }
136 
137 namespace
138 {
139 #ifdef CV_PARALLEL_FRAMEWORK
140     class ParallelLoopBodyWrapper
141     {
142     public:
ParallelLoopBodyWrapper(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)143         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
144         {
145             body = &_body;
146             wholeRange = _r;
147             double len = wholeRange.end - wholeRange.start;
148             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
149         }
operator ()(const cv::Range & sr) const150         void operator()(const cv::Range& sr) const
151         {
152             cv::Range r;
153             r.start = (int)(wholeRange.start +
154                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
155             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
156                             ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
157             (*body)(r);
158         }
stripeRange() const159         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
160 
161     protected:
162         const cv::ParallelLoopBody* body;
163         cv::Range wholeRange;
164         int nstripes;
165     };
166 
167 #if defined HAVE_TBB
168     class ProxyLoopBody : public ParallelLoopBodyWrapper
169     {
170     public:
ProxyLoopBody(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)171         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
172         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
173         {}
174 
operator ()(const tbb::blocked_range<int> & range) const175         void operator ()(const tbb::blocked_range<int>& range) const
176         {
177             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
178         }
179     };
180 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
181     typedef ParallelLoopBodyWrapper ProxyLoopBody;
182 #elif defined HAVE_GCD
183     typedef ParallelLoopBodyWrapper ProxyLoopBody;
block_function(void * context,size_t index)184     static void block_function(void* context, size_t index)
185     {
186         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
187         (*ptr_body)(cv::Range((int)index, (int)index + 1));
188     }
189 #elif defined WINRT || defined HAVE_CONCURRENCY
190     class ProxyLoopBody : public ParallelLoopBodyWrapper
191     {
192     public:
ProxyLoopBody(const cv::ParallelLoopBody & _body,const cv::Range & _r,double _nstripes)193         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
194         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
195         {}
196 
operator ()(int i) const197         void operator ()(int i) const
198         {
199             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
200         }
201     };
202 #else
203     typedef ParallelLoopBodyWrapper ProxyLoopBody;
204 #endif
205 
206 static int numThreads = -1;
207 
208 #if defined HAVE_TBB
209 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
210 #elif defined HAVE_CSTRIPES
211 // nothing for C=
212 #elif defined HAVE_OPENMP
213 static int numThreadsMax = omp_get_max_threads();
214 #elif defined HAVE_GCD
215 // nothing for GCD
216 #elif defined WINRT
217 // nothing for WINRT
218 #elif defined HAVE_CONCURRENCY
219 
220 class SchedPtr
221 {
222     Concurrency::Scheduler* sched_;
223 public:
operator ->()224     Concurrency::Scheduler* operator->() { return sched_; }
operator Concurrency::Scheduler*()225     operator Concurrency::Scheduler*() { return sched_; }
226 
operator =(Concurrency::Scheduler * sched)227     void operator=(Concurrency::Scheduler* sched)
228     {
229         if (sched_) sched_->Release();
230         sched_ = sched;
231     }
232 
SchedPtr()233     SchedPtr() : sched_(0) {}
~SchedPtr()234     ~SchedPtr() { *this = 0; }
235 };
236 static SchedPtr pplScheduler;
237 
238 #endif
239 
240 #endif // CV_PARALLEL_FRAMEWORK
241 
242 } //namespace
243 
244 /* ================================   parallel_for_  ================================ */
245 
parallel_for_(const cv::Range & range,const cv::ParallelLoopBody & body,double nstripes)246 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
247 {
248 #ifdef CV_PARALLEL_FRAMEWORK
249 
250     if(numThreads != 0)
251     {
252         ProxyLoopBody pbody(body, range, nstripes);
253         cv::Range stripeRange = pbody.stripeRange();
254         if( stripeRange.end - stripeRange.start == 1 )
255         {
256             body(range);
257             return;
258         }
259 
260 #if defined HAVE_TBB
261 
262         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
263 
264 #elif defined HAVE_CSTRIPES
265 
266         parallel(MAX(0, numThreads))
267         {
268             int offset = stripeRange.start;
269             int len = stripeRange.end - offset;
270             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
271             pbody(r);
272             barrier();
273         }
274 
275 #elif defined HAVE_OPENMP
276 
277         #pragma omp parallel for schedule(dynamic)
278         for (int i = stripeRange.start; i < stripeRange.end; ++i)
279             pbody(Range(i, i + 1));
280 
281 #elif defined HAVE_GCD
282 
283         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
284         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
285 
286 #elif defined WINRT
287 
288         Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
289 
290 #elif defined HAVE_CONCURRENCY
291 
292         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
293         {
294             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
295         }
296         else
297         {
298             pplScheduler->Attach();
299             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
300             Concurrency::CurrentScheduler::Detach();
301         }
302 
303 #elif defined HAVE_PTHREADS
304         void parallel_for_pthreads(const Range& range, const ParallelLoopBody& body, double nstripes);
305         parallel_for_pthreads(range, body, nstripes);
306 
307 #else
308 
309 #error You have hacked and compiling with unsupported parallel framework
310 
311 #endif
312 
313     }
314     else
315 
316 #endif // CV_PARALLEL_FRAMEWORK
317     {
318         (void)nstripes;
319         body(range);
320     }
321 }
322 
getNumThreads(void)323 int cv::getNumThreads(void)
324 {
325 #ifdef CV_PARALLEL_FRAMEWORK
326 
327     if(numThreads == 0)
328         return 1;
329 
330 #endif
331 
332 #if defined HAVE_TBB
333 
334     return tbbScheduler.is_active()
335            ? numThreads
336            : tbb::task_scheduler_init::default_num_threads();
337 
338 #elif defined HAVE_CSTRIPES
339 
340     return numThreads > 0
341             ? numThreads
342             : cv::getNumberOfCPUs();
343 
344 #elif defined HAVE_OPENMP
345 
346     return omp_get_max_threads();
347 
348 #elif defined HAVE_GCD
349 
350     return 512; // the GCD thread pool limit
351 
352 #elif defined WINRT
353 
354     return 0;
355 
356 #elif defined HAVE_CONCURRENCY
357 
358     return 1 + (pplScheduler == 0
359         ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
360         : pplScheduler->GetNumberOfVirtualProcessors());
361 
362 #elif defined HAVE_PTHREADS
363 
364         size_t parallel_pthreads_get_threads_num();
365 
366         return parallel_pthreads_get_threads_num();
367 
368 #else
369 
370     return 1;
371 
372 #endif
373 }
374 
setNumThreads(int threads)375 void cv::setNumThreads( int threads )
376 {
377     (void)threads;
378 #ifdef CV_PARALLEL_FRAMEWORK
379     numThreads = threads;
380 #endif
381 
382 #ifdef HAVE_TBB
383 
384     if(tbbScheduler.is_active()) tbbScheduler.terminate();
385     if(threads > 0) tbbScheduler.initialize(threads);
386 
387 #elif defined HAVE_CSTRIPES
388 
389     return; // nothing needed
390 
391 #elif defined HAVE_OPENMP
392 
393     if(omp_in_parallel())
394         return; // can't change number of openmp threads inside a parallel region
395 
396     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
397 
398 #elif defined HAVE_GCD
399 
400     // unsupported
401     // there is only private dispatch_queue_set_width() and only for desktop
402 
403 #elif defined WINRT
404 
405     return;
406 
407 #elif defined HAVE_CONCURRENCY
408 
409     if (threads <= 0)
410     {
411         pplScheduler = 0;
412     }
413     else if (threads == 1)
414     {
415         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
416         numThreads = 0;
417     }
418     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
419     {
420         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
421                        Concurrency::MinConcurrency, threads-1,
422                        Concurrency::MaxConcurrency, threads-1));
423     }
424 
425 #elif defined HAVE_PTHREADS
426 
427     void parallel_pthreads_set_threads_num(int num);
428 
429     parallel_pthreads_set_threads_num(threads);
430 
431 #endif
432 }
433 
434 
getThreadNum(void)435 int cv::getThreadNum(void)
436 {
437 #if defined HAVE_TBB
438     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
439         return tbb::task_arena::current_slot();
440     #else
441         return 0;
442     #endif
443 #elif defined HAVE_CSTRIPES
444     return pix();
445 #elif defined HAVE_OPENMP
446     return omp_get_thread_num();
447 #elif defined HAVE_GCD
448     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
449 #elif defined WINRT
450     return 0;
451 #elif defined HAVE_CONCURRENCY
452     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
453 #else
454     return 0;
455 #endif
456 }
457 
458 #ifdef ANDROID
getNumberOfCPUsImpl()459 static inline int getNumberOfCPUsImpl()
460 {
461    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
462    if(!cpuPossible)
463        return 1;
464 
465    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
466    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
467    fclose(cpuPossible);
468    if(!pbuf)
469       return 1;
470 
471    //parse string of form "0-1,3,5-7,10,13-15"
472    int cpusAvailable = 0;
473 
474    while(*pbuf)
475    {
476       const char* pos = pbuf;
477       bool range = false;
478       while(*pbuf && *pbuf != ',')
479       {
480           if(*pbuf == '-') range = true;
481           ++pbuf;
482       }
483       if(*pbuf) *pbuf++ = 0;
484       if(!range)
485         ++cpusAvailable;
486       else
487       {
488           int rstart = 0, rend = 0;
489           sscanf(pos, "%d-%d", &rstart, &rend);
490           cpusAvailable += rend - rstart + 1;
491       }
492 
493    }
494    return cpusAvailable ? cpusAvailable : 1;
495 }
496 #endif
497 
getNumberOfCPUs(void)498 int cv::getNumberOfCPUs(void)
499 {
500 #if defined WIN32 || defined _WIN32
501     SYSTEM_INFO sysinfo;
502 #if defined(_M_ARM) || defined(_M_X64) || defined(WINRT)
503     GetNativeSystemInfo( &sysinfo );
504 #else
505     GetSystemInfo( &sysinfo );
506 #endif
507 
508     return (int)sysinfo.dwNumberOfProcessors;
509 #elif defined ANDROID
510     static int ncpus = getNumberOfCPUsImpl();
511     return ncpus;
512 #elif defined __linux__
513     return (int)sysconf( _SC_NPROCESSORS_ONLN );
514 #elif defined __APPLE__
515     int numCPU=0;
516     int mib[4];
517     size_t len = sizeof(numCPU);
518 
519     /* set the mib for hw.ncpu */
520     mib[0] = CTL_HW;
521     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
522 
523     /* get the number of CPUs from the system */
524     sysctl(mib, 2, &numCPU, &len, NULL, 0);
525 
526     if( numCPU < 1 )
527     {
528         mib[1] = HW_NCPU;
529         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
530 
531         if( numCPU < 1 )
532             numCPU = 1;
533     }
534 
535     return (int)numCPU;
536 #else
537     return 1;
538 #endif
539 }
540 
currentParallelFramework()541 const char* cv::currentParallelFramework() {
542 #ifdef CV_PARALLEL_FRAMEWORK
543     return CV_PARALLEL_FRAMEWORK;
544 #else
545     return NULL;
546 #endif
547 }
548 
cvSetNumThreads(int nt)549 CV_IMPL void cvSetNumThreads(int nt)
550 {
551     cv::setNumThreads(nt);
552 }
553 
cvGetNumThreads()554 CV_IMPL int cvGetNumThreads()
555 {
556     return cv::getNumThreads();
557 }
558 
cvGetThreadNum()559 CV_IMPL int cvGetThreadNum()
560 {
561     return cv::getThreadNum();
562 }
563