1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #ifndef _ncv_hpp_
44 #define _ncv_hpp_
45 
46 #include "opencv2/core/cvdef.h"
47 
48 #ifdef _WIN32
49     #define WIN32_LEAN_AND_MEAN
50 #endif
51 
52 #include <cuda_runtime.h>
53 #include "opencv2/core/cvstd.hpp"
54 #include "opencv2/core/utility.hpp"
55 
56 
57 //==============================================================================
58 //
59 // Compile-time assert functionality
60 //
61 //==============================================================================
62 
63 //! @addtogroup cudalegacy
64 //! @{
65 
66 /**
67 * Compile-time assert namespace
68 */
69 namespace NcvCTprep
70 {
71     template <bool x>
72     struct CT_ASSERT_FAILURE;
73 
74     template <>
75     struct CT_ASSERT_FAILURE<true> {};
76 
77     template <int x>
78     struct assertTest{};
79 }
80 
81 
82 #define NCV_CT_PREP_PASTE_AUX(a,b)      a##b                         ///< Concatenation indirection macro
83 #define NCV_CT_PREP_PASTE(a,b)          NCV_CT_PREP_PASTE_AUX(a, b)  ///< Concatenation macro
84 
85 
86 /**
87 * Performs compile-time assertion of a condition on the file scope
88 */
89 #define NCV_CT_ASSERT(X) \
90     typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
91     NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
92 
93 
94 
95 //==============================================================================
96 //
97 // Alignment macros
98 //
99 //==============================================================================
100 
101 
102 #if !defined(__align__) && !defined(__CUDACC__)
103     #if defined(_WIN32) || defined(_WIN64)
104         #define __align__(n)         __declspec(align(n))
105     #elif defined(__unix__)
106         #define __align__(n)         __attribute__((__aligned__(n)))
107     #endif
108 #endif
109 
110 
111 //==============================================================================
112 //
113 // Integral and compound types of guaranteed size
114 //
115 //==============================================================================
116 
117 
118 typedef               bool NcvBool;
119 typedef          long long Ncv64s;
120 
121 #if defined(__APPLE__) && !defined(__CUDACC__)
122     typedef uint64_t Ncv64u;
123 #else
124     typedef unsigned long long Ncv64u;
125 #endif
126 
127 typedef                int Ncv32s;
128 typedef       unsigned int Ncv32u;
129 typedef              short Ncv16s;
130 typedef     unsigned short Ncv16u;
131 typedef        signed char Ncv8s;
132 typedef      unsigned char Ncv8u;
133 typedef              float Ncv32f;
134 typedef             double Ncv64f;
135 
136 
137 struct NcvRect8u
138 {
139     Ncv8u x;
140     Ncv8u y;
141     Ncv8u width;
142     Ncv8u height;
NcvRect8uNcvRect8u143     __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {};
NcvRect8uNcvRect8u144     __host__ __device__ NcvRect8u(Ncv8u x_, Ncv8u y_, Ncv8u width_, Ncv8u height_) : x(x_), y(y_), width(width_), height(height_) {}
145 };
146 
147 
148 struct NcvRect32s
149 {
150     Ncv32s x;          ///< x-coordinate of upper left corner.
151     Ncv32s y;          ///< y-coordinate of upper left corner.
152     Ncv32s width;      ///< Rectangle width.
153     Ncv32s height;     ///< Rectangle height.
NcvRect32sNcvRect32s154     __host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {};
NcvRect32sNcvRect32s155     __host__ __device__ NcvRect32s(Ncv32s x_, Ncv32s y_, Ncv32s width_, Ncv32s height_)
156         : x(x_), y(y_), width(width_), height(height_) {}
157 };
158 
159 
160 struct NcvRect32u
161 {
162     Ncv32u x;          ///< x-coordinate of upper left corner.
163     Ncv32u y;          ///< y-coordinate of upper left corner.
164     Ncv32u width;      ///< Rectangle width.
165     Ncv32u height;     ///< Rectangle height.
NcvRect32uNcvRect32u166     __host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {};
NcvRect32uNcvRect32u167     __host__ __device__ NcvRect32u(Ncv32u x_, Ncv32u y_, Ncv32u width_, Ncv32u height_)
168         : x(x_), y(y_), width(width_), height(height_) {}
169 };
170 
171 
172 struct NcvSize32s
173 {
174     Ncv32s width;  ///< Rectangle width.
175     Ncv32s height; ///< Rectangle height.
NcvSize32sNcvSize32s176     __host__ __device__ NcvSize32s() : width(0), height(0) {};
NcvSize32sNcvSize32s177     __host__ __device__ NcvSize32s(Ncv32s width_, Ncv32s height_) : width(width_), height(height_) {}
178 };
179 
180 
181 struct NcvSize32u
182 {
183     Ncv32u width;  ///< Rectangle width.
184     Ncv32u height; ///< Rectangle height.
NcvSize32uNcvSize32u185     __host__ __device__ NcvSize32u() : width(0), height(0) {};
NcvSize32uNcvSize32u186     __host__ __device__ NcvSize32u(Ncv32u width_, Ncv32u height_) : width(width_), height(height_) {}
operator ==NcvSize32u187     __host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
188 };
189 
190 
191 struct NcvPoint2D32s
192 {
193     Ncv32s x; ///< Point X.
194     Ncv32s y; ///< Point Y.
NcvPoint2D32sNcvPoint2D32s195     __host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
NcvPoint2D32sNcvPoint2D32s196     __host__ __device__ NcvPoint2D32s(Ncv32s x_, Ncv32s y_) : x(x_), y(y_) {}
197 };
198 
199 
200 struct NcvPoint2D32u
201 {
202     Ncv32u x; ///< Point X.
203     Ncv32u y; ///< Point Y.
NcvPoint2D32uNcvPoint2D32u204     __host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
NcvPoint2D32uNcvPoint2D32u205     __host__ __device__ NcvPoint2D32u(Ncv32u x_, Ncv32u y_) : x(x_), y(y_) {}
206 };
207 
208 //! @cond IGNORED
209 
210 NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
211 NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
212 NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
213 NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
214 NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
215 NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
216 NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
217 NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
218 NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
219 NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
220 NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
221 NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
222 NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
223 NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
224 NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
225 NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
226 
227 //! @endcond
228 
229 //==============================================================================
230 //
231 // Persistent constants
232 //
233 //==============================================================================
234 
235 
236 const Ncv32u K_WARP_SIZE = 32;
237 const Ncv32u K_LOG2_WARP_SIZE = 5;
238 
239 
240 //==============================================================================
241 //
242 // Error handling
243 //
244 //==============================================================================
245 
246 
247 CV_EXPORTS void ncvDebugOutput(const cv::String &msg);
248 
249 
250 typedef void NCVDebugOutputHandler(const cv::String &msg);
251 
252 
253 CV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
254 
255 
256 #define ncvAssertPrintCheck(pred, msg) \
257     do \
258     { \
259         if (!(pred)) \
260         { \
261             cv::String str = cv::format("NCV Assertion Failed: %s, file=%s, line=%d", msg, __FILE__, __LINE__); \
262             ncvDebugOutput(str); \
263         } \
264     } while (0)
265 
266 
267 #define ncvAssertPrintReturn(pred, msg, err) \
268     do \
269     { \
270         ncvAssertPrintCheck(pred, msg); \
271         if (!(pred)) return err; \
272     } while (0)
273 
274 
275 #define ncvAssertReturn(pred, err) \
276     do \
277     { \
278         cv::String msg = cv::format("retcode=%d", (int)err); \
279         ncvAssertPrintReturn(pred, msg.c_str(), err); \
280     } while (0)
281 
282 
283 #define ncvAssertReturnNcvStat(ncvOp) \
284     do \
285     { \
286         NCVStatus _ncvStat = ncvOp; \
287         cv::String msg = cv::format("NcvStat=%d", (int)_ncvStat); \
288         ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, msg.c_str(), _ncvStat); \
289     } while (0)
290 
291 
292 #define ncvAssertCUDAReturn(cudacall, errCode) \
293     do \
294     { \
295         cudaError_t res = cudacall; \
296         cv::String msg = cv::format("cudaError_t=%d", (int)res); \
297         ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
298     } while (0)
299 
300 
301 #define ncvAssertCUDALastErrorReturn(errCode) \
302     do \
303     { \
304         cudaError_t res = cudaGetLastError(); \
305         cv::String msg = cv::format("cudaError_t=%d", (int)res); \
306         ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
307     } while (0)
308 
309 
310 /**
311 * Return-codes for status notification, errors and warnings
312 */
313 enum
314 {
315     //NCV statuses
316     NCV_SUCCESS,
317     NCV_UNKNOWN_ERROR,
318 
319     NCV_CUDA_ERROR,
320     NCV_NPP_ERROR,
321     NCV_FILE_ERROR,
322 
323     NCV_NULL_PTR,
324     NCV_INCONSISTENT_INPUT,
325     NCV_TEXTURE_BIND_ERROR,
326     NCV_DIMENSIONS_INVALID,
327 
328     NCV_INVALID_ROI,
329     NCV_INVALID_STEP,
330     NCV_INVALID_SCALE,
331 
332     NCV_ALLOCATOR_NOT_INITIALIZED,
333     NCV_ALLOCATOR_BAD_ALLOC,
334     NCV_ALLOCATOR_BAD_DEALLOC,
335     NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
336     NCV_ALLOCATOR_DEALLOC_ORDER,
337     NCV_ALLOCATOR_BAD_REUSE,
338 
339     NCV_MEM_COPY_ERROR,
340     NCV_MEM_RESIDENCE_ERROR,
341     NCV_MEM_INSUFFICIENT_CAPACITY,
342 
343     NCV_HAAR_INVALID_PIXEL_STEP,
344     NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
345     NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
346     NCV_HAAR_TOO_LARGE_FEATURES,
347     NCV_HAAR_XML_LOADING_EXCEPTION,
348 
349     NCV_NOIMPL_HAAR_TILTED_FEATURES,
350     NCV_NOT_IMPLEMENTED,
351 
352     NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
353 
354     //NPP statuses
355     NPPST_SUCCESS = NCV_SUCCESS,              ///< Successful operation (same as NPP_NO_ERROR)
356     NPPST_ERROR,                              ///< Unknown error
357     NPPST_CUDA_KERNEL_EXECUTION_ERROR,        ///< CUDA kernel execution error
358     NPPST_NULL_POINTER_ERROR,                 ///< NULL pointer argument error
359     NPPST_TEXTURE_BIND_ERROR,                 ///< CUDA texture binding error or non-zero offset returned
360     NPPST_MEMCPY_ERROR,                       ///< CUDA memory copy error
361     NPPST_MEM_ALLOC_ERR,                      ///< CUDA memory allocation error
362     NPPST_MEMFREE_ERR,                        ///< CUDA memory deallocation error
363 
364     //NPPST statuses
365     NPPST_INVALID_ROI,                        ///< Invalid region of interest argument
366     NPPST_INVALID_STEP,                       ///< Invalid image lines step argument (check sign, alignment, relation to image width)
367     NPPST_INVALID_SCALE,                      ///< Invalid scale parameter passed
368     NPPST_MEM_INSUFFICIENT_BUFFER,            ///< Insufficient user-allocated buffer
369     NPPST_MEM_RESIDENCE_ERROR,                ///< Memory residence error detected (check if pointers should be device or pinned)
370     NPPST_MEM_INTERNAL_ERROR,                 ///< Internal memory management error
371 
372     NCV_LAST_STATUS                           ///< Marker to continue error numeration in other files
373 };
374 
375 
376 typedef Ncv32u NCVStatus;
377 
378 
379 #define NCV_SET_SKIP_COND(x) \
380     bool __ncv_skip_cond = x
381 
382 
383 #define NCV_RESET_SKIP_COND(x) \
384     __ncv_skip_cond = x
385 
386 
387 #define NCV_SKIP_COND_BEGIN \
388     if (!__ncv_skip_cond) {
389 
390 
391 #define NCV_SKIP_COND_END \
392     }
393 
394 
395 //==============================================================================
396 //
397 // Timer
398 //
399 //==============================================================================
400 
401 
402 typedef struct _NcvTimer *NcvTimer;
403 
404 CV_EXPORTS NcvTimer ncvStartTimer(void);
405 
406 CV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
407 
408 CV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
409 
410 
411 //==============================================================================
412 //
413 // Memory management classes template compound types
414 //
415 //==============================================================================
416 
417 
418 /**
419 * Calculates the aligned top bound value
420 */
421 CV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
422 
423 
424 /**
425 * NCVMemoryType
426 */
427 enum NCVMemoryType
428 {
429     NCVMemoryTypeNone,
430     NCVMemoryTypeHostPageable,
431     NCVMemoryTypeHostPinned,
432     NCVMemoryTypeDevice
433 };
434 
435 
436 /**
437 * NCVMemPtr
438 */
439 struct CV_EXPORTS NCVMemPtr
440 {
441     void *ptr;
442     NCVMemoryType memtype;
443     void clear();
444 };
445 
446 
447 /**
448 * NCVMemSegment
449 */
450 struct CV_EXPORTS NCVMemSegment
451 {
452     NCVMemPtr begin;
453     size_t size;
454     void clear();
455 };
456 
457 
458 /**
459 * INCVMemAllocator (Interface)
460 */
461 class CV_EXPORTS INCVMemAllocator
462 {
463 public:
464     virtual ~INCVMemAllocator() = 0;
465 
466     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;
467     virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
468 
469     virtual NcvBool isInitialized(void) const = 0;
470     virtual NcvBool isCounting(void) const = 0;
471 
472     virtual NCVMemoryType memType(void) const = 0;
473     virtual Ncv32u alignment(void) const = 0;
474     virtual size_t maxSize(void) const = 0;
475 };
476 
~INCVMemAllocator()477 inline INCVMemAllocator::~INCVMemAllocator() {}
478 
479 
480 /**
481 * NCVMemStackAllocator
482 */
483 class CV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
484 {
485     NCVMemStackAllocator();
486     NCVMemStackAllocator(const NCVMemStackAllocator &);
487 
488 public:
489 
490     explicit NCVMemStackAllocator(Ncv32u alignment);
491     NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
492     virtual ~NCVMemStackAllocator();
493 
494     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
495     virtual NCVStatus dealloc(NCVMemSegment &seg);
496 
497     virtual NcvBool isInitialized(void) const;
498     virtual NcvBool isCounting(void) const;
499 
500     virtual NCVMemoryType memType(void) const;
501     virtual Ncv32u alignment(void) const;
502     virtual size_t maxSize(void) const;
503 
504 private:
505 
506     NCVMemoryType _memType;
507     Ncv32u _alignment;
508     Ncv8u *allocBegin;
509     Ncv8u *begin;
510     Ncv8u *end;
511     size_t currentSize;
512     size_t _maxSize;
513     NcvBool bReusesMemory;
514 };
515 
516 
517 /**
518 * NCVMemNativeAllocator
519 */
520 class CV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
521 {
522 public:
523 
524     NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
525     virtual ~NCVMemNativeAllocator();
526 
527     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
528     virtual NCVStatus dealloc(NCVMemSegment &seg);
529 
530     virtual NcvBool isInitialized(void) const;
531     virtual NcvBool isCounting(void) const;
532 
533     virtual NCVMemoryType memType(void) const;
534     virtual Ncv32u alignment(void) const;
535     virtual size_t maxSize(void) const;
536 
537 private:
538 
539     NCVMemNativeAllocator();
540     NCVMemNativeAllocator(const NCVMemNativeAllocator &);
541 
542     NCVMemoryType _memType;
543     Ncv32u _alignment;
544     size_t currentSize;
545     size_t _maxSize;
546 };
547 
548 
549 /**
550 * Copy dispatchers
551 */
552 CV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
553                                        const void *src, NCVMemoryType srcType,
554                                        size_t sz, cudaStream_t cuStream);
555 
556 
557 CV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
558                                          const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
559                                          Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
560 
561 
562 /**
563 * NCVVector (1D)
564 */
565 template <class T>
566 class NCVVector
567 {
568     NCVVector(const NCVVector &);
569 
570 public:
571 
NCVVector()572     NCVVector()
573     {
574         clear();
575     }
576 
~NCVVector()577     virtual ~NCVVector() {}
578 
clear()579     void clear()
580     {
581         _ptr = NULL;
582         _length = 0;
583         _memtype = NCVMemoryTypeNone;
584     }
585 
copySolid(NCVVector<T> & dst,cudaStream_t cuStream,size_t howMuch=0) const586     NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
587     {
588         if (howMuch == 0)
589         {
590             ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
591             howMuch = this->_length * sizeof(T);
592         }
593         else
594         {
595             ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
596                 this->_length * sizeof(T) >= howMuch &&
597                 howMuch > 0, NCV_MEM_COPY_ERROR);
598         }
599         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
600                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
601 
602         NCVStatus ncvStat = NCV_SUCCESS;
603         if (this->_memtype != NCVMemoryTypeNone)
604         {
605             ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
606                                        this->_ptr, this->_memtype,
607                                        howMuch, cuStream);
608         }
609 
610         return ncvStat;
611     }
612 
ptr() const613     T *ptr() const {return this->_ptr;}
length() const614     size_t length() const {return this->_length;}
memType() const615     NCVMemoryType memType() const {return this->_memtype;}
616 
617 protected:
618 
619     T *_ptr;
620     size_t _length;
621     NCVMemoryType _memtype;
622 };
623 
624 
625 /**
626 * NCVVectorAlloc
627 */
628 template <class T>
629 class NCVVectorAlloc : public NCVVector<T>
630 {
631     NCVVectorAlloc();
632     NCVVectorAlloc(const NCVVectorAlloc &);
633     NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
634 
635 public:
636 
NCVVectorAlloc(INCVMemAllocator & allocator_,Ncv32u length_)637     NCVVectorAlloc(INCVMemAllocator &allocator_, Ncv32u length_)
638         :
639         allocator(allocator_)
640     {
641         NCVStatus ncvStat;
642 
643         this->clear();
644         this->allocatedMem.clear();
645 
646         ncvStat = allocator.alloc(this->allocatedMem, length_ * sizeof(T));
647         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
648 
649         this->_ptr = (T *)this->allocatedMem.begin.ptr;
650         this->_length = length_;
651         this->_memtype = this->allocatedMem.begin.memtype;
652     }
653 
~NCVVectorAlloc()654     ~NCVVectorAlloc()
655     {
656         NCVStatus ncvStat;
657 
658         ncvStat = allocator.dealloc(this->allocatedMem);
659         ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
660 
661         this->clear();
662     }
663 
isMemAllocated() const664     NcvBool isMemAllocated() const
665     {
666         return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
667     }
668 
getAllocatorsAlignment() const669     Ncv32u getAllocatorsAlignment() const
670     {
671         return allocator.alignment();
672     }
673 
getSegment() const674     NCVMemSegment getSegment() const
675     {
676         return allocatedMem;
677     }
678 
679 private:
680     INCVMemAllocator &allocator;
681     NCVMemSegment allocatedMem;
682 };
683 
684 
685 /**
686 * NCVVectorReuse
687 */
688 template <class T>
689 class NCVVectorReuse : public NCVVector<T>
690 {
691     NCVVectorReuse();
692     NCVVectorReuse(const NCVVectorReuse &);
693 
694 public:
695 
NCVVectorReuse(const NCVMemSegment & memSegment)696     explicit NCVVectorReuse(const NCVMemSegment &memSegment)
697     {
698         this->bReused = false;
699         this->clear();
700 
701         this->_length = memSegment.size / sizeof(T);
702         this->_ptr = (T *)memSegment.begin.ptr;
703         this->_memtype = memSegment.begin.memtype;
704 
705         this->bReused = true;
706     }
707 
NCVVectorReuse(const NCVMemSegment & memSegment,Ncv32u length_)708     NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length_)
709     {
710         this->bReused = false;
711         this->clear();
712 
713         ncvAssertPrintReturn(length_ * sizeof(T) <= memSegment.size, \
714             "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
715 
716         this->_length = length_;
717         this->_ptr = (T *)memSegment.begin.ptr;
718         this->_memtype = memSegment.begin.memtype;
719 
720         this->bReused = true;
721     }
722 
isMemReused() const723     NcvBool isMemReused() const
724     {
725         return this->bReused;
726     }
727 
728 private:
729 
730     NcvBool bReused;
731 };
732 
733 
734 /**
735 * NCVMatrix (2D)
736 */
737 template <class T>
738 class NCVMatrix
739 {
740     NCVMatrix(const NCVMatrix &);
741 
742 public:
743 
NCVMatrix()744     NCVMatrix()
745     {
746         clear();
747     }
748 
~NCVMatrix()749     virtual ~NCVMatrix() {}
750 
clear()751     void clear()
752     {
753         _ptr = NULL;
754         _pitch = 0;
755         _width = 0;
756         _height = 0;
757         _memtype = NCVMemoryTypeNone;
758     }
759 
stride() const760     Ncv32u stride() const
761     {
762         return _pitch / sizeof(T);
763     }
764 
765     //a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten
copySolid(NCVMatrix<T> & dst,cudaStream_t cuStream,size_t howMuch=0) const766     NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
767     {
768         if (howMuch == 0)
769         {
770             ncvAssertReturn(dst._pitch == this->_pitch &&
771                             dst._height == this->_height, NCV_MEM_COPY_ERROR);
772             howMuch = this->_pitch * this->_height;
773         }
774         else
775         {
776             ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
777                             this->_pitch * this->_height >= howMuch &&
778                             howMuch > 0, NCV_MEM_COPY_ERROR);
779         }
780         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
781                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
782 
783         NCVStatus ncvStat = NCV_SUCCESS;
784         if (this->_memtype != NCVMemoryTypeNone)
785         {
786             ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
787                                        this->_ptr, this->_memtype,
788                                        howMuch, cuStream);
789         }
790 
791         return ncvStat;
792     }
793 
copy2D(NCVMatrix<T> & dst,NcvSize32u roi,cudaStream_t cuStream) const794     NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const
795     {
796         ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height &&
797                         dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR);
798         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
799                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
800 
801         NCVStatus ncvStat = NCV_SUCCESS;
802         if (this->_memtype != NCVMemoryTypeNone)
803         {
804             ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype,
805                                          this->_ptr, this->_pitch, this->_memtype,
806                                          roi.width * sizeof(T), roi.height, cuStream);
807         }
808 
809         return ncvStat;
810     }
811 
at(Ncv32u x,Ncv32u y) const812     T& at(Ncv32u x, Ncv32u y) const
813     {
814         NcvBool bOutRange = (x >= this->_width || y >= this->_height);
815         ncvAssertPrintCheck(!bOutRange, "Error addressing matrix");
816         if (bOutRange)
817         {
818             return *this->_ptr;
819         }
820         return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
821     }
822 
ptr() const823     T *ptr() const {return this->_ptr;}
width() const824     Ncv32u width() const {return this->_width;}
height() const825     Ncv32u height() const {return this->_height;}
size() const826     NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
pitch() const827     Ncv32u pitch() const {return this->_pitch;}
memType() const828     NCVMemoryType memType() const {return this->_memtype;}
829 
830 protected:
831 
832     T *_ptr;
833     Ncv32u _width;
834     Ncv32u _height;
835     Ncv32u _pitch;
836     NCVMemoryType _memtype;
837 };
838 
839 
840 /**
841 * NCVMatrixAlloc
842 */
843 template <class T>
844 class NCVMatrixAlloc : public NCVMatrix<T>
845 {
846     NCVMatrixAlloc();
847     NCVMatrixAlloc(const NCVMatrixAlloc &);
848     NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
849 public:
850 
NCVMatrixAlloc(INCVMemAllocator & allocator_,Ncv32u width_,Ncv32u height_,Ncv32u pitch_=0)851     NCVMatrixAlloc(INCVMemAllocator &allocator_, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0)
852         :
853         allocator(allocator_)
854     {
855         NCVStatus ncvStat;
856 
857         this->clear();
858         this->allocatedMem.clear();
859 
860         Ncv32u widthBytes = width_ * sizeof(T);
861         Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
862 
863         if (pitch_ != 0)
864         {
865             ncvAssertPrintReturn(pitch_ >= pitchBytes &&
866                 (pitch_ & (allocator.alignment() - 1)) == 0,
867                 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
868             pitchBytes = pitch_;
869         }
870 
871         Ncv32u requiredAllocSize = pitchBytes * height_;
872 
873         ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
874         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
875 
876         this->_ptr = (T *)this->allocatedMem.begin.ptr;
877         this->_width = width_;
878         this->_height = height_;
879         this->_pitch = pitchBytes;
880         this->_memtype = this->allocatedMem.begin.memtype;
881     }
882 
~NCVMatrixAlloc()883     ~NCVMatrixAlloc()
884     {
885         NCVStatus ncvStat;
886 
887         ncvStat = allocator.dealloc(this->allocatedMem);
888         ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
889 
890         this->clear();
891     }
892 
isMemAllocated() const893     NcvBool isMemAllocated() const
894     {
895         return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
896     }
897 
getAllocatorsAlignment() const898     Ncv32u getAllocatorsAlignment() const
899     {
900         return allocator.alignment();
901     }
902 
getSegment() const903     NCVMemSegment getSegment() const
904     {
905         return allocatedMem;
906     }
907 
908 private:
909 
910     INCVMemAllocator &allocator;
911     NCVMemSegment allocatedMem;
912 };
913 
914 
915 /**
916 * NCVMatrixReuse
917 */
918 template <class T>
919 class NCVMatrixReuse : public NCVMatrix<T>
920 {
921     NCVMatrixReuse();
922     NCVMatrixReuse(const NCVMatrixReuse &);
923 
924 public:
925 
NCVMatrixReuse(const NCVMemSegment & memSegment,Ncv32u alignment,Ncv32u width_,Ncv32u height_,Ncv32u pitch_=0,NcvBool bSkipPitchCheck=false)926     NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0, NcvBool bSkipPitchCheck=false)
927     {
928         this->bReused = false;
929         this->clear();
930 
931         Ncv32u widthBytes = width_ * sizeof(T);
932         Ncv32u pitchBytes = alignUp(widthBytes, alignment);
933 
934         if (pitch_ != 0)
935         {
936             if (!bSkipPitchCheck)
937             {
938                 ncvAssertPrintReturn(pitch_ >= pitchBytes &&
939                     (pitch_ & (alignment - 1)) == 0,
940                     "NCVMatrixReuse ctor:: incorrect pitch passed", );
941             }
942             else
943             {
944                 ncvAssertPrintReturn(pitch_ >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
945             }
946             pitchBytes = pitch_;
947         }
948 
949         ncvAssertPrintReturn(pitchBytes * height_ <= memSegment.size, \
950             "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
951 
952         this->_width = width_;
953         this->_height = height_;
954         this->_pitch = pitchBytes;
955         this->_ptr = (T *)memSegment.begin.ptr;
956         this->_memtype = memSegment.begin.memtype;
957 
958         this->bReused = true;
959     }
960 
NCVMatrixReuse(const NCVMatrix<T> & mat,NcvRect32u roi)961     NCVMatrixReuse(const NCVMatrix<T> &mat, NcvRect32u roi)
962     {
963         this->bReused = false;
964         this->clear();
965 
966         ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \
967             roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(),
968             "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
969 
970         this->_width = roi.width;
971         this->_height = roi.height;
972         this->_pitch = mat.pitch();
973         this->_ptr = &mat.at(roi.x, roi.y);
974         this->_memtype = mat.memType();
975 
976         this->bReused = true;
977     }
978 
isMemReused() const979     NcvBool isMemReused() const
980     {
981         return this->bReused;
982     }
983 
984 private:
985 
986     NcvBool bReused;
987 };
988 
989 
990 /**
991 * Operations with rectangles
992 */
993 CV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
994                                               Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
995 
996 
997 CV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
998                                            NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
999 
1000 
1001 CV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
1002                                             NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
1003 
1004 
1005 CV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
1006                                              NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
1007 
1008 
1009 CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
1010                                               NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
1011 
1012 
1013 #define CLAMP(x,a,b)        ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
1014 #define CLAMP_TOP(x, a)     (((x) > (a)) ? (a) : (x))
1015 #define CLAMP_BOTTOM(x, a)  (((x) < (a)) ? (a) : (x))
1016 #define CLAMP_0_255(x)      CLAMP(x,0,255)
1017 
1018 
1019 #define SUB_BEGIN(type, name)    struct { __inline type name
1020 #define SUB_END(name)            } name;
1021 #define SUB_CALL(name)           name.name
1022 
1023 #define SQR(x)              ((x)*(x))
1024 
1025 
1026 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
1027     NCVMatrixAlloc<type> name(alloc, width, height); \
1028     ncvAssertReturn(name.isMemAllocated(), err);
1029 
1030 //! @}
1031 
1032 #endif // _ncv_hpp_
1033