Home
last modified time | relevance | path

Searched refs:thread_per_block (Results 1 – 25 of 60) sorted by relevance

123

/external/tensorflow/tensorflow/core/util/
Dgpu_launch_config.h117 int thread_per_block = -1; member
135 const int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); in GetGpuLaunchConfig() local
137 std::min(DivUp(physical_thread_count, thread_per_block), in GetGpuLaunchConfig()
141 config.thread_per_block = thread_per_block; in GetGpuLaunchConfig()
163 int thread_per_block = 0; in GetGpuLaunchConfig() local
167 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetGpuLaunchConfig()
172 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetGpuLaunchConfig()
178 std::min(block_count, DivUp(work_element_count, thread_per_block)); in GetGpuLaunchConfig()
181 config.thread_per_block = thread_per_block; in GetGpuLaunchConfig()
212 config.thread_per_block = fixed_block_size; in GetGpuLaunchConfigFixedBlockSize()
[all …]
Dgpu_kernel_helper_test.cu.cc185 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F()
189 TF_CHECK_OK(GpuLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F()
198 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F()
202 TF_CHECK_OK(GpuLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F()
223 return a.thread_per_block.x == b.thread_per_block.x && in operator ==()
224 a.thread_per_block.y == b.thread_per_block.y && in operator ==()
225 a.thread_per_block.z == b.thread_per_block.z && in operator ==()
229 a.thread_per_block.x == b.thread_per_block.x && in operator ==()
230 a.thread_per_block.y == b.thread_per_block.y && in operator ==()
231 a.thread_per_block.z == b.thread_per_block.z; in operator ==()
[all …]
/external/tensorflow/tensorflow/core/kernels/
Dbias_op_gpu.cu.cc90 config.thread_per_block, 0, d.stream(), in compute()
95 config.thread_per_block, 0, d.stream(), in compute()
234 config.block_count, config.thread_per_block, in compute()
241 if (config.thread_per_block < kWarpSize) { in compute()
242 config.thread_per_block = kWarpSize; in compute()
245 config.block_count, config.thread_per_block, in compute()
255 BiasGradNHWC_Naive<T>, config.block_count, config.thread_per_block, 0, in compute()
259 config.thread_per_block, 0, d.stream(), in compute()
Ddepthtospace_op_gpu.cu.cc165 D2S_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
201 D2S_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()()
208 D2S_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()()
215 D2S_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()()
230 D2S_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
Dinplace_ops_functor_gpu.cu.cc54 DoParallelConcatOpKernel<T>, cfg.block_count, cfg.thread_per_block, 0, in DoParallelConcatUpdate()
124 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
130 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp()
136 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp()
158 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
Dspacetodepth_op_gpu.cu.cc162 S2D_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
198 S2D_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()()
205 S2D_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()()
212 S2D_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()()
227 S2D_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
Ddilation_ops_gpu.cu.cc200 DilationKernel<T>, config.block_count, config.thread_per_block, 0, in operator ()()
234 config.thread_per_block, 0, d.stream(), in operator ()()
242 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
276 config.thread_per_block, 0, d.stream(), in operator ()()
284 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
Dcwise_op_clip_gpu.cu.cc74 UnaryClipCustomKernel<T>, config.block_count, config.thread_per_block, in operator ()()
91 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
107 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
Dsegment_reduction_ops_gpu.cu.cc148 config.thread_per_block, 0, d.stream(), in operator ()()
175 config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
195 SetToValue<T>, config.block_count, config.thread_per_block, 0, in operator ()()
213 config.thread_per_block, 0, d.stream(), input_outer_dim_size, in operator ()()
Dsplit_lib_gpu.cu.cc203 config.thread_per_block, 0, d.stream(), input, in Run()
219 config.thread_per_block, 0, gpu_device.stream(), in Run()
233 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptr, in Run()
238 config.thread_per_block, 0, gpu_device.stream(), input_ptr, in Run()
Ddiag_op_gpu.cu.cc66 DiagGpuKernel<T>, diag_config.block_count, diag_config.thread_per_block, in operator ()()
104 diag_config.thread_per_block, 0, device.stream(), in operator ()()
Dconv_2d_gpu.h498 config.block_count, config.thread_per_block,
504 config.block_count, config.thread_per_block,
535 config.block_count, config.thread_per_block,
549 config.block_count, config.thread_per_block,
587 config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
593 config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
1028 config.block_count, config.thread_per_block, 0,
1078 block_count = (total_size + config.thread_per_block - 1) /
1079 config.thread_per_block;
1086 block_count, config.thread_per_block / kUnroll, 0,
[all …]
Dgather_functor_gpu.cu.h101 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
109 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
Dbucketize_op_gpu.cu.cc101 config.block_count, config.thread_per_block, in Compute()
108 config.thread_per_block, 0, d.stream(), input.size(), input.data(), in Compute()
Dbincount_op_gpu.cu.cc127 config.thread_per_block, 0, d.stream(), arr.data(), in Compute()
214 config.block_count, config.thread_per_block, smem_usage, d.stream(), in Compute()
220 config.thread_per_block, 0, d.stream(), in.data(), weights.data(), in Compute()
Dconcat_lib_gpu_impl.cu.cc149 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, split_size, in ConcatGPUImpl()
163 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptrs, in ConcatGPUImpl()
169 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, in ConcatGPUImpl()
Dsearchsorted_op_gpu.cu.cc77 config.thread_per_block, 0, device.stream(), sorted_inputs.data(), in Compute()
100 config.thread_per_block, 0, device.stream(), sorted_inputs.data(), in Compute()
/external/tensorflow/tensorflow/core/kernels/sparse/
Dkernels_gpu.cu.cc158 config.block_count, config.thread_per_block, 0, in operator ()()
163 config.block_count, config.thread_per_block, 0, in operator ()()
234 config.block_count, config.thread_per_block, 0, in operator ()()
251 config.thread_per_block, shared_memory_size, d.stream(), in operator ()()
321 config.thread_per_block, shared_memory_size, d.stream(), a_values.data(), in CSRSparseMatrixBatchMulVecImpl()
435 config.block_count, config.thread_per_block, 0, in CSRSparseMatrixSoftmaxGPUImpl()
456 config.block_count, config.thread_per_block, in CSRSparseMatrixSoftmaxGPUImpl()
631 config.thread_per_block, 0, d.stream(), rows /*size*/, in CSRSparseMatrixSoftmaxGradGPUImpl()
662 config.thread_per_block, shared_memory_size, d.stream(), size, rows, in CSRSparseMatrixSoftmaxGradGPUImpl()
/external/tensorflow/tensorflow/core/kernels/image/
Dcrop_and_resize_op_gpu.cu.cc377 CropAndResizeKernel<T>, config.block_count, config.thread_per_block, in operator ()()
413 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()()
429 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
463 SetZero<float>, config.block_count, config.thread_per_block, 0, in operator ()()
473 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
Dresize_nearest_neighbor_op_gpu.cu.cc184 GpuLaunchKernel(kernel, config.block_count, config.thread_per_block, 0, in operator ()()
221 output_config.thread_per_block, 0, d.stream(), in operator ()()
238 kernel, input_config.block_count, input_config.thread_per_block, 0, in operator ()()
Dresize_bilinear_op_gpu.cu.cc444 GpuLaunchKernel(kernel, config.block_count, config.thread_per_block, 0, in operator ()()
493 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
500 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()()
508 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
515 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
Dadjust_saturation_op_gpu.cu.cc36 const int threads_per_block = config.thread_per_block; in operator ()()
Dadjust_hue_op_gpu.cu.cc35 const int threads_per_block = config.thread_per_block; in operator ()()
Dnon_max_suppression_op.cu.cc273 config.thread_per_block, 0, device.stream(), in NmsGpu()
319 config.thread_per_block, 0, device.stream(), in NmsGpu()
463 config.thread_per_block, 0, device.stream(), in DoNMS()
484 config.thread_per_block, 0, device.stream(), in DoNMS()
526 config.thread_per_block, 0, device.stream(), in DoNMS()
540 IndexMultiSelect<int, int>, config.block_count, config.thread_per_block, in DoNMS()
/external/tensorflow/tensorflow/core/kernels/linalg/
Dmatrix_set_diag_op_gpu.cu.cc121 config.thread_per_block, 0, device.stream(), in Compute()
129 config.thread_per_block, 0, device.stream(), in Compute()

123