Lines Matching refs:nblocks
117 template <int nblocks> // Number of histogram blocks processed by single GPU thread block
131 float* final_hist = smem + cnbins * 48 * nblocks; in compute_hists_kernel_many_blocks()
147 hist[bin_id * 48 * nblocks] = 0.f; in compute_hists_kernel_many_blocks()
168 hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x; in compute_hists_kernel_many_blocks()
169 hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y; in compute_hists_kernel_many_blocks()
173 for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48 * nblocks) in compute_hists_kernel_many_blocks()
199 const int nblocks = 1; in compute_hists() local
206 dim3 grid(divUp(img_block_width, nblocks), img_block_height); in compute_hists()
207 dim3 threads(32, 2, nblocks); in compute_hists()
209 cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>, in compute_hists()
215 … int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12 * nblocks) * sizeof(float); in compute_hists()
216 … int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * nblocks) * sizeof(float); in compute_hists()
218 compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>( in compute_hists()
262 int nblocks> // Number of block hisograms processed by one GPU thread block
274 __shared__ float sh_squares[nthreads * nblocks]; in normalize_hists_kernel_many_blocks()
298 const int nblocks = 1; in normalize_hists() local
302 dim3 threads(nthreads, 1, nblocks); in normalize_hists()
306 dim3 grid(divUp(img_block_width, nblocks), img_block_height); in normalize_hists()
309 …normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads>>>(block_hist_size, img_block_widt… in normalize_hists()
311 …normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_widt… in normalize_hists()
313 …normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_widt… in normalize_hists()
315 …normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_wid… in normalize_hists()
317 …normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_wid… in normalize_hists()
333 int nblocks> // Number of histogram block processed by single GPU thread block
355 __shared__ float products[nthreads * nblocks]; in compute_confidence_hists_kernel_many_blocks()
371 const int nblocks = 1; in compute_confidence_hists() local
378 dim3 threads(nthreads, 1, nblocks); in compute_confidence_hists()
379 dim3 grid(divUp(img_win_width, nblocks), img_win_height); in compute_confidence_hists()
381 …cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>, in compute_confidence_hists()
386 compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>( in compute_confidence_hists()
395 int nblocks> // Number of histogram block processed by single GPU thread block
417 __shared__ float products[nthreads * nblocks]; in classify_hists_kernel_many_blocks()
433 const int nblocks = 1; in classify_hists() local
440 dim3 threads(nthreads, 1, nblocks); in classify_hists()
441 dim3 grid(divUp(img_win_width, nblocks), img_win_height); in classify_hists()
443 …cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFunc… in classify_hists()
446 classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>( in classify_hists()