hog.cu - OpenGrok cross reference for /external/opencv3/modules/cudaobjdetect/src/cuda/hog.cu

Lines Matching refs:nthreads
261         template <int nthreads, // Number of threads which process one block historgam
274             __shared__ float sh_squares[nthreads * nblocks];  in normalize_hists_kernel_many_blocks()
275             float* squares = sh_squares + threadIdx.z * nthreads;  in normalize_hists_kernel_many_blocks()
281             float sum = reduce_smem<nthreads>(squares, elem * elem);  in normalize_hists_kernel_many_blocks()
286             sum = reduce_smem<nthreads>(squares, elem * elem);  in normalize_hists_kernel_many_blocks()
301             int nthreads = power_2up(block_hist_size);  in normalize_hists()  local
302             dim3 threads(nthreads, 1, nblocks);  in normalize_hists()
308             if (nthreads == 32)  in normalize_hists()
310             else if (nthreads == 64)  in normalize_hists()
312             else if (nthreads == 128)  in normalize_hists()
314             else if (nthreads == 256)  in normalize_hists()
316             else if (nthreads == 512)  in normalize_hists()
332        template <int nthreads, // Number of threads per one histogram block
348            for (int i = threadIdx.x; i < cdescr_size; i += nthreads)  in compute_confidence_hists_kernel_many_blocks()
355            __shared__ float products[nthreads * nblocks];  in compute_confidence_hists_kernel_many_blocks()
357            const int tid = threadIdx.z * nthreads + threadIdx.x;  in compute_confidence_hists_kernel_many_blocks()
359            reduce<nthreads>(products, product, tid, plus<float>());  in compute_confidence_hists_kernel_many_blocks()
370            const int nthreads = 256;  in compute_confidence_hists()  local
378            dim3 threads(nthreads, 1, nblocks);  in compute_confidence_hists()
381 …cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,  in compute_confidence_hists()
386            compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(  in compute_confidence_hists()
394         template <int nthreads, // Number of threads per one histogram block
410             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)  in classify_hists_kernel_many_blocks()
417             __shared__ float products[nthreads * nblocks];  in classify_hists_kernel_many_blocks()
419             const int tid = threadIdx.z * nthreads + threadIdx.x;  in classify_hists_kernel_many_blocks()
421             reduce<nthreads>(products, product, tid, plus<float>());  in classify_hists_kernel_many_blocks()
432             const int nthreads = 256;  in classify_hists()  local
440             dim3 threads(nthreads, 1, nblocks);  in classify_hists()
443 …cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFunc…  in classify_hists()
446             classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(  in classify_hists()
458         template <int nthreads>
470             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)  in extract_descrs_by_rows_kernel()
482             const int nthreads = 256;  in extract_descrs_by_rows()  local
488             dim3 threads(nthreads, 1);  in extract_descrs_by_rows()
492             extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(  in extract_descrs_by_rows()
500         template <int nthreads>
513             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)  in extract_descrs_by_cols_kernel()
531             const int nthreads = 256;  in extract_descrs_by_cols()  local
537             dim3 threads(nthreads, 1);  in extract_descrs_by_cols()
541             extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(  in extract_descrs_by_cols()
552         template <int nthreads, int correct_gamma>
560             __shared__ float sh_row[(nthreads + 2) * 3];  in compute_gradients_8UC4_kernel()
569             sh_row[threadIdx.x + 1 + (nthreads + 2)] = val.y;  in compute_gradients_8UC4_kernel()
570             sh_row[threadIdx.x + 1 + 2 * (nthreads + 2)] = val.z;  in compute_gradients_8UC4_kernel()
576                 sh_row[(nthreads + 2)] = val.y;  in compute_gradients_8UC4_kernel()
577                 sh_row[2 * (nthreads + 2)] = val.z;  in compute_gradients_8UC4_kernel()
584                 sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;  in compute_gradients_8UC4_kernel()
585                 sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;  in compute_gradients_8UC4_kernel()
594                 b.y = sh_row[threadIdx.x + 2 + (nthreads + 2)];  in compute_gradients_8UC4_kernel()
595                 b.z = sh_row[threadIdx.x + 2 + 2 * (nthreads + 2)];  in compute_gradients_8UC4_kernel()
597                 a.y = sh_row[threadIdx.x + (nthreads + 2)];  in compute_gradients_8UC4_kernel()
598                 a.z = sh_row[threadIdx.x + 2 * (nthreads + 2)];  in compute_gradients_8UC4_kernel()
659             const int nthreads = 256;  in compute_gradients_8UC4()  local
661             dim3 bdim(nthreads, 1);  in compute_gradients_8UC4()
665 …compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, …  in compute_gradients_8UC4()
667 …compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, …  in compute_gradients_8UC4()
674         template <int nthreads, int correct_gamma>
682             __shared__ float sh_row[nthreads + 2];  in compute_gradients_8UC1_kernel()
732             const int nthreads = 256;  in compute_gradients_8UC1()  local
734             dim3 bdim(nthreads, 1);  in compute_gradients_8UC1()
738 …compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, …  in compute_gradients_8UC1()
740 …compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, …  in compute_gradients_8UC1()