1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencv2/objdetect/objdetect_c.h"
45
46 using namespace cv;
47 using namespace cv::cuda;
48
49 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
50
create(const String &)51 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
create(const FileStorage &)52 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
53
54 #else
55
56 //
57 // CascadeClassifierBase
58 //
59
60 namespace
61 {
62 class CascadeClassifierBase : public cuda::CascadeClassifier
63 {
64 public:
65 CascadeClassifierBase();
66
setMaxObjectSize(Size maxObjectSize)67 virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
getMaxObjectSize() const68 virtual Size getMaxObjectSize() const { return maxObjectSize_; }
69
setMinObjectSize(Size minSize)70 virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
getMinObjectSize() const71 virtual Size getMinObjectSize() const { return minObjectSize_; }
72
setScaleFactor(double scaleFactor)73 virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
getScaleFactor() const74 virtual double getScaleFactor() const { return scaleFactor_; }
75
setMinNeighbors(int minNeighbors)76 virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
getMinNeighbors() const77 virtual int getMinNeighbors() const { return minNeighbors_; }
78
setFindLargestObject(bool findLargestObject)79 virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
getFindLargestObject()80 virtual bool getFindLargestObject() { return findLargestObject_; }
81
setMaxNumObjects(int maxNumObjects)82 virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
getMaxNumObjects() const83 virtual int getMaxNumObjects() const { return maxNumObjects_; }
84
85 protected:
86 Size maxObjectSize_;
87 Size minObjectSize_;
88 double scaleFactor_;
89 int minNeighbors_;
90 bool findLargestObject_;
91 int maxNumObjects_;
92 };
93
CascadeClassifierBase()94 CascadeClassifierBase::CascadeClassifierBase() :
95 maxObjectSize_(),
96 minObjectSize_(),
97 scaleFactor_(1.2),
98 minNeighbors_(4),
99 findLargestObject_(false),
100 maxNumObjects_(100)
101 {
102 }
103 }
104
105 //
106 // HaarCascade
107 //
108
109 #ifdef HAVE_OPENCV_CUDALEGACY
110
111 namespace
112 {
113 class HaarCascade_Impl : public CascadeClassifierBase
114 {
115 public:
116 explicit HaarCascade_Impl(const String& filename);
117
118 virtual Size getClassifierSize() const;
119
120 virtual void detectMultiScale(InputArray image,
121 OutputArray objects,
122 Stream& stream);
123
124 virtual void convert(OutputArray gpu_objects,
125 std::vector<Rect>& objects);
126
127 private:
128 NCVStatus load(const String& classifierFile);
129 NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
130 NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
131
132 Size lastAllocatedFrameSize;
133
134 Ptr<NCVMemStackAllocator> gpuAllocator;
135 Ptr<NCVMemStackAllocator> cpuAllocator;
136
137 cudaDeviceProp devProp;
138 NCVStatus ncvStat;
139
140 Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
141 Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
142
143 Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages;
144 Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
145 Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures;
146
147 HaarClassifierCascadeDescriptor haar;
148
149 Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages;
150 Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
151 Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures;
152 };
153
NCVDebugOutputHandler(const String & msg)154 static void NCVDebugOutputHandler(const String &msg)
155 {
156 CV_Error(Error::GpuApiCallError, msg.c_str());
157 }
158
HaarCascade_Impl(const String & filename)159 HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
160 lastAllocatedFrameSize(-1, -1)
161 {
162 ncvSetDebugOutputHandler(NCVDebugOutputHandler);
163 ncvSafeCall( load(filename) );
164 }
165
getClassifierSize() const166 Size HaarCascade_Impl::getClassifierSize() const
167 {
168 return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
169 }
170
detectMultiScale(InputArray _image,OutputArray _objects,Stream & stream)171 void HaarCascade_Impl::detectMultiScale(InputArray _image,
172 OutputArray _objects,
173 Stream& stream)
174 {
175 const GpuMat image = _image.getGpuMat();
176
177 CV_Assert( image.depth() == CV_8U);
178 CV_Assert( scaleFactor_ > 1 );
179 CV_Assert( !stream );
180
181 Size ncvMinSize = getClassifierSize();
182 if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
183 {
184 ncvMinSize.width = minObjectSize_.width;
185 ncvMinSize.height = minObjectSize_.height;
186 }
187
188 BufferPool pool(stream);
189 GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
190
191 unsigned int numDetections;
192 ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
193
194 if (numDetections > 0)
195 {
196 objectsBuf.colRange(0, numDetections).copyTo(_objects);
197 }
198 else
199 {
200 _objects.release();
201 }
202 }
203
convert(OutputArray _gpu_objects,std::vector<Rect> & objects)204 void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
205 {
206 if (_gpu_objects.empty())
207 {
208 objects.clear();
209 return;
210 }
211
212 Mat gpu_objects;
213 if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
214 {
215 _gpu_objects.getGpuMat().download(gpu_objects);
216 }
217 else
218 {
219 gpu_objects = _gpu_objects.getMat();
220 }
221
222 CV_Assert( gpu_objects.rows == 1 );
223 CV_Assert( gpu_objects.type() == DataType<Rect>::type );
224
225 Rect* ptr = gpu_objects.ptr<Rect>();
226 objects.assign(ptr, ptr + gpu_objects.cols);
227 }
228
load(const String & classifierFile)229 NCVStatus HaarCascade_Impl::load(const String& classifierFile)
230 {
231 int devId = cv::cuda::getDevice();
232 ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
233
234 // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
235 gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
236 cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
237
238 ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
239 ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
240
241 Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
242 ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
243 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
244
245 h_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages));
246 h_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes));
247 h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures));
248
249 ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
250 ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
251 ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
252
253 ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
254 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);
255
256 d_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages));
257 d_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes));
258 d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures));
259
260 ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
261 ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
262 ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
263
264 ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
265 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
266 ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
267 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
268 ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
269 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
270
271 return NCV_SUCCESS;
272 }
273
calculateMemReqsAndAllocate(const Size & frameSize)274 NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
275 {
276 if (lastAllocatedFrameSize == frameSize)
277 {
278 return NCV_SUCCESS;
279 }
280
281 // Calculate memory requirements and create real allocators
282 NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment));
283 NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment));
284
285 ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
286 ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
287
288 NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
289 NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);
290
291 ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
292 ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
293
294 NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
295 ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
296
297 NcvSize32u roi;
298 roi.width = d_src.width();
299 roi.height = d_src.height();
300 Ncv32u numDetections;
301 ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
302 *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
303
304 ncvAssertReturnNcvStat(ncvStat);
305 ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
306
307 gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
308 cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
309
310 ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
311 ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
312
313 lastAllocatedFrameSize = frameSize;
314 return NCV_SUCCESS;
315 }
316
process(const GpuMat & src,GpuMat & objects,cv::Size ncvMinSize,unsigned int & numDetections)317 NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
318 {
319 calculateMemReqsAndAllocate(src.size());
320
321 NCVMemPtr src_beg;
322 src_beg.ptr = (void*)src.ptr<Ncv8u>();
323 src_beg.memtype = NCVMemoryTypeDevice;
324
325 NCVMemSegment src_seg;
326 src_seg.begin = src_beg;
327 src_seg.size = src.step * src.rows;
328
329 NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
330 ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
331
332 CV_Assert(objects.rows == 1);
333
334 NCVMemPtr objects_beg;
335 objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
336 objects_beg.memtype = NCVMemoryTypeDevice;
337
338 NCVMemSegment objects_seg;
339 objects_seg.begin = objects_beg;
340 objects_seg.size = objects.step * objects.rows;
341 NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
342 ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
343
344 NcvSize32u roi;
345 roi.width = d_src.width();
346 roi.height = d_src.height();
347
348 NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
349
350 Ncv32u flags = 0;
351 flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
352
353 ncvStat = ncvDetectObjectsMultiScale_device(
354 d_src, roi, d_rects, numDetections, haar, *h_haarStages,
355 *d_haarStages, *d_haarNodes, *d_haarFeatures,
356 winMinSize,
357 minNeighbors_,
358 scaleFactor_, 1,
359 flags,
360 *gpuAllocator, *cpuAllocator, devProp, 0);
361 ncvAssertReturnNcvStat(ncvStat);
362 ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
363
364 return NCV_SUCCESS;
365 }
366 }
367
368 #endif
369
370 //
371 // LbpCascade
372 //
373
374 namespace cv { namespace cuda { namespace device
375 {
376 namespace lbp
377 {
378 void classifyPyramid(int frameW,
379 int frameH,
380 int windowW,
381 int windowH,
382 float initalScale,
383 float factor,
384 int total,
385 const PtrStepSzb& mstages,
386 const int nstages,
387 const PtrStepSzi& mnodes,
388 const PtrStepSzf& mleaves,
389 const PtrStepSzi& msubsets,
390 const PtrStepSzb& mfeatures,
391 const int subsetSize,
392 PtrStepSz<int4> objects,
393 unsigned int* classified,
394 PtrStepSzi integral);
395
396 void connectedConmonents(PtrStepSz<int4> candidates,
397 int ncandidates,
398 PtrStepSz<int4> objects,
399 int groupThreshold,
400 float grouping_eps,
401 unsigned int* nclasses);
402 }
403 }}}
404
405 namespace
406 {
operator -(const cv::Size & a,const cv::Size & b)407 cv::Size operator -(const cv::Size& a, const cv::Size& b)
408 {
409 return cv::Size(a.width - b.width, a.height - b.height);
410 }
411
operator +(const cv::Size & a,const int & i)412 cv::Size operator +(const cv::Size& a, const int& i)
413 {
414 return cv::Size(a.width + i, a.height + i);
415 }
416
operator *(const cv::Size & a,const float & f)417 cv::Size operator *(const cv::Size& a, const float& f)
418 {
419 return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
420 }
421
operator /(const cv::Size & a,const float & f)422 cv::Size operator /(const cv::Size& a, const float& f)
423 {
424 return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
425 }
426
operator <=(const cv::Size & a,const cv::Size & b)427 bool operator <=(const cv::Size& a, const cv::Size& b)
428 {
429 return a.width <= b.width && a.height <= b.width;
430 }
431
432 struct PyrLavel
433 {
PyrLavel__anon3438be970311::PyrLavel434 PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
435 {
436 do
437 {
438 order = _order;
439 scale = pow(_scale, order);
440 sFrame = frame / scale;
441 workArea = sFrame - window + 1;
442 sWindow = window * scale;
443 _order++;
444 } while (sWindow <= minObjectSize);
445 }
446
isFeasible__anon3438be970311::PyrLavel447 bool isFeasible(cv::Size maxObj)
448 {
449 return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
450 }
451
next__anon3438be970311::PyrLavel452 PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
453 {
454 return PyrLavel(order + 1, factor, frame, window, minObjectSize);
455 }
456
457 int order;
458 float scale;
459 cv::Size sFrame;
460 cv::Size workArea;
461 cv::Size sWindow;
462 };
463
464 class LbpCascade_Impl : public CascadeClassifierBase
465 {
466 public:
467 explicit LbpCascade_Impl(const FileStorage& file);
468
getClassifierSize() const469 virtual Size getClassifierSize() const { return NxM; }
470
471 virtual void detectMultiScale(InputArray image,
472 OutputArray objects,
473 Stream& stream);
474
475 virtual void convert(OutputArray gpu_objects,
476 std::vector<Rect>& objects);
477
478 private:
479 bool load(const FileNode &root);
480 void allocateBuffers(cv::Size frame);
481
482 private:
483 struct Stage
484 {
485 int first;
486 int ntrees;
487 float threshold;
488 };
489
490 enum stage { BOOST = 0 };
491 enum feature { LBP = 1, HAAR = 2 };
492
493 static const stage stageType = BOOST;
494 static const feature featureType = LBP;
495
496 cv::Size NxM;
497 bool isStumps;
498 int ncategories;
499 int subsetSize;
500 int nodeStep;
501
502 // gpu representation of classifier
503 GpuMat stage_mat;
504 GpuMat trees_mat;
505 GpuMat nodes_mat;
506 GpuMat leaves_mat;
507 GpuMat subsets_mat;
508 GpuMat features_mat;
509
510 GpuMat integral;
511 GpuMat integralBuffer;
512 GpuMat resuzeBuffer;
513
514 GpuMat candidates;
515 static const int integralFactor = 4;
516 };
517
LbpCascade_Impl(const FileStorage & file)518 LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
519 {
520 load(file.getFirstTopLevelNode());
521 }
522
detectMultiScale(InputArray _image,OutputArray _objects,Stream & stream)523 void LbpCascade_Impl::detectMultiScale(InputArray _image,
524 OutputArray _objects,
525 Stream& stream)
526 {
527 const GpuMat image = _image.getGpuMat();
528
529 CV_Assert( image.depth() == CV_8U);
530 CV_Assert( scaleFactor_ > 1 );
531 CV_Assert( !stream );
532
533 const float grouping_eps = 0.2f;
534
535 BufferPool pool(stream);
536 GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
537
538 // used for debug
539 // candidates.setTo(cv::Scalar::all(0));
540 // objects.setTo(cv::Scalar::all(0));
541
542 if (maxObjectSize_ == cv::Size())
543 maxObjectSize_ = image.size();
544
545 allocateBuffers(image.size());
546
547 unsigned int classified = 0;
548 GpuMat dclassified(1, 1, CV_32S);
549 cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
550
551 PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
552
553 while (level.isFeasible(maxObjectSize_))
554 {
555 int acc = level.sFrame.width + 1;
556 float iniScale = level.scale;
557
558 cv::Size area = level.workArea;
559 int step = 1 + (level.scale <= 2.f);
560
561 int total = 0, prev = 0;
562
563 while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
564 {
565 // create sutable matrix headers
566 GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
567 GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
568
569 // generate integral for scale
570 cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
571 cuda::integral(src, sint);
572
573 // calculate job
574 int totalWidth = level.workArea.width / step;
575 total += totalWidth * (level.workArea.height / step);
576
577 // go to next pyramide level
578 level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
579 area = level.workArea;
580
581 step = (1 + (level.scale <= 2.f));
582 prev = acc;
583 acc += level.sFrame.width + 1;
584 }
585
586 device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
587 leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
588 }
589
590 if (minNeighbors_ <= 0 || objects.empty())
591 return;
592
593 cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
594 device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
595
596 cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
597 cudaSafeCall( cudaDeviceSynchronize() );
598
599 if (classified > 0)
600 {
601 objects.colRange(0, classified).copyTo(_objects);
602 }
603 else
604 {
605 _objects.release();
606 }
607 }
608
convert(OutputArray _gpu_objects,std::vector<Rect> & objects)609 void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
610 {
611 if (_gpu_objects.empty())
612 {
613 objects.clear();
614 return;
615 }
616
617 Mat gpu_objects;
618 if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
619 {
620 _gpu_objects.getGpuMat().download(gpu_objects);
621 }
622 else
623 {
624 gpu_objects = _gpu_objects.getMat();
625 }
626
627 CV_Assert( gpu_objects.rows == 1 );
628 CV_Assert( gpu_objects.type() == DataType<Rect>::type );
629
630 Rect* ptr = gpu_objects.ptr<Rect>();
631 objects.assign(ptr, ptr + gpu_objects.cols);
632 }
633
load(const FileNode & root)634 bool LbpCascade_Impl::load(const FileNode &root)
635 {
636 const char *CUDA_CC_STAGE_TYPE = "stageType";
637 const char *CUDA_CC_FEATURE_TYPE = "featureType";
638 const char *CUDA_CC_BOOST = "BOOST";
639 const char *CUDA_CC_LBP = "LBP";
640 const char *CUDA_CC_MAX_CAT_COUNT = "maxCatCount";
641 const char *CUDA_CC_HEIGHT = "height";
642 const char *CUDA_CC_WIDTH = "width";
643 const char *CUDA_CC_STAGE_PARAMS = "stageParams";
644 const char *CUDA_CC_MAX_DEPTH = "maxDepth";
645 const char *CUDA_CC_FEATURE_PARAMS = "featureParams";
646 const char *CUDA_CC_STAGES = "stages";
647 const char *CUDA_CC_STAGE_THRESHOLD = "stageThreshold";
648 const float CUDA_THRESHOLD_EPS = 1e-5f;
649 const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers";
650 const char *CUDA_CC_INTERNAL_NODES = "internalNodes";
651 const char *CUDA_CC_LEAF_VALUES = "leafValues";
652 const char *CUDA_CC_FEATURES = "features";
653 const char *CUDA_CC_RECT = "rect";
654
655 String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE];
656 CV_Assert(stageTypeStr == CUDA_CC_BOOST);
657
658 String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE];
659 CV_Assert(featureTypeStr == CUDA_CC_LBP);
660
661 NxM.width = (int)root[CUDA_CC_WIDTH];
662 NxM.height = (int)root[CUDA_CC_HEIGHT];
663 CV_Assert( NxM.height > 0 && NxM.width > 0 );
664
665 isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false;
666 CV_Assert(isStumps);
667
668 FileNode fn = root[CUDA_CC_FEATURE_PARAMS];
669 if (fn.empty())
670 return false;
671
672 ncategories = fn[CUDA_CC_MAX_CAT_COUNT];
673
674 subsetSize = (ncategories + 31) / 32;
675 nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );
676
677 fn = root[CUDA_CC_STAGES];
678 if (fn.empty())
679 return false;
680
681 std::vector<Stage> stages;
682 stages.reserve(fn.size());
683
684 std::vector<int> cl_trees;
685 std::vector<int> cl_nodes;
686 std::vector<float> cl_leaves;
687 std::vector<int> subsets;
688
689 FileNodeIterator it = fn.begin(), it_end = fn.end();
690 for (size_t si = 0; it != it_end; si++, ++it )
691 {
692 FileNode fns = *it;
693 Stage st;
694 st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS;
695
696 fns = fns[CUDA_CC_WEAK_CLASSIFIERS];
697 if (fns.empty())
698 return false;
699
700 st.ntrees = (int)fns.size();
701 st.first = (int)cl_trees.size();
702
703 stages.push_back(st);// (int, int, float)
704
705 cl_trees.reserve(stages[si].first + stages[si].ntrees);
706
707 // weak trees
708 FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
709 for ( ; it1 != it1_end; ++it1 )
710 {
711 FileNode fnw = *it1;
712
713 FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES];
714 FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES];
715 if ( internalNodes.empty() || leafValues.empty() )
716 return false;
717
718 int nodeCount = (int)internalNodes.size()/nodeStep;
719 cl_trees.push_back(nodeCount);
720
721 cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
722 cl_leaves.reserve(cl_leaves.size() + leafValues.size());
723
724 if( subsetSize > 0 )
725 subsets.reserve(subsets.size() + nodeCount * subsetSize);
726
727 // nodes
728 FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
729
730 for( ; iIt != iEnd; )
731 {
732 cl_nodes.push_back((int)*(iIt++));
733 cl_nodes.push_back((int)*(iIt++));
734 cl_nodes.push_back((int)*(iIt++));
735
736 if( subsetSize > 0 )
737 for( int j = 0; j < subsetSize; j++, ++iIt )
738 subsets.push_back((int)*iIt);
739 }
740
741 // leaves
742 iIt = leafValues.begin(), iEnd = leafValues.end();
743 for( ; iIt != iEnd; ++iIt )
744 cl_leaves.push_back((float)*iIt);
745 }
746 }
747
748 fn = root[CUDA_CC_FEATURES];
749 if( fn.empty() )
750 return false;
751 std::vector<uchar> features;
752 features.reserve(fn.size() * 4);
753 FileNodeIterator f_it = fn.begin(), f_end = fn.end();
754 for (; f_it != f_end; ++f_it)
755 {
756 FileNode rect = (*f_it)[CUDA_CC_RECT];
757 FileNodeIterator r_it = rect.begin();
758 features.push_back(saturate_cast<uchar>((int)*(r_it++)));
759 features.push_back(saturate_cast<uchar>((int)*(r_it++)));
760 features.push_back(saturate_cast<uchar>((int)*(r_it++)));
761 features.push_back(saturate_cast<uchar>((int)*(r_it++)));
762 }
763
764 // copy data structures on gpu
765 stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) ));
766 trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
767 nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
768 leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
769 subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
770 features_mat.upload(cv::Mat(features).reshape(4,1));
771
772 return true;
773 }
774
allocateBuffers(cv::Size frame)775 void LbpCascade_Impl::allocateBuffers(cv::Size frame)
776 {
777 if (frame == cv::Size())
778 return;
779
780 if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
781 {
782 resuzeBuffer.create(frame, CV_8UC1);
783
784 integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
785
786 #ifdef HAVE_OPENCV_CUDALEGACY
787 NcvSize32u roiSize;
788 roiSize.width = frame.width;
789 roiSize.height = frame.height;
790
791 cudaDeviceProp prop;
792 cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
793
794 Ncv32u bufSize;
795 ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
796 integralBuffer.create(1, bufSize, CV_8UC1);
797 #endif
798
799 candidates.create(1 , frame.width >> 1, CV_32SC4);
800 }
801 }
802
803 }
804
805 //
806 // create
807 //
808
create(const String & filename)809 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
810 {
811 String fext = filename.substr(filename.find_last_of(".") + 1);
812 fext = fext.toLowerCase();
813
814 if (fext == "nvbin")
815 {
816 #ifndef HAVE_OPENCV_CUDALEGACY
817 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
818 return Ptr<cuda::CascadeClassifier>();
819 #else
820 return makePtr<HaarCascade_Impl>(filename);
821 #endif
822 }
823
824 FileStorage fs(filename, FileStorage::READ);
825
826 if (!fs.isOpened())
827 {
828 #ifndef HAVE_OPENCV_CUDALEGACY
829 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
830 return Ptr<cuda::CascadeClassifier>();
831 #else
832 return makePtr<HaarCascade_Impl>(filename);
833 #endif
834 }
835
836 const char *CUDA_CC_LBP = "LBP";
837 String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
838 if (featureTypeStr == CUDA_CC_LBP)
839 {
840 return makePtr<LbpCascade_Impl>(fs);
841 }
842 else
843 {
844 #ifndef HAVE_OPENCV_CUDALEGACY
845 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
846 return Ptr<cuda::CascadeClassifier>();
847 #else
848 return makePtr<HaarCascade_Impl>(filename);
849 #endif
850 }
851
852 CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
853 return Ptr<cuda::CascadeClassifier>();
854 }
855
create(const FileStorage & file)856 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
857 {
858 return makePtr<LbpCascade_Impl>(file);
859 }
860
861 #endif
862