/external/tensorflow/tensorflow/core/util/ |
D | gpu_launch_config.h | 117 int thread_per_block = -1; member 135 const int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); in GetGpuLaunchConfig() local 137 std::min(DivUp(physical_thread_count, thread_per_block), in GetGpuLaunchConfig() 141 config.thread_per_block = thread_per_block; in GetGpuLaunchConfig() 163 int thread_per_block = 0; in GetGpuLaunchConfig() local 167 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetGpuLaunchConfig() 172 &block_count, &thread_per_block, func, dynamic_shared_memory_size, in GetGpuLaunchConfig() 178 std::min(block_count, DivUp(work_element_count, thread_per_block)); in GetGpuLaunchConfig() 181 config.thread_per_block = thread_per_block; in GetGpuLaunchConfig() 212 config.thread_per_block = fixed_block_size; in GetGpuLaunchConfigFixedBlockSize() [all …]
|
D | gpu_kernel_helper_test.cu.cc | 185 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F() 189 TF_CHECK_OK(GpuLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F() 198 cfg.thread_per_block, 0, d.stream(), cfg, \ in TEST_F() 202 TF_CHECK_OK(GpuLaunchKernel(Count1D, cfg.block_count, cfg.thread_per_block, \ in TEST_F() 223 return a.thread_per_block.x == b.thread_per_block.x && in operator ==() 224 a.thread_per_block.y == b.thread_per_block.y && in operator ==() 225 a.thread_per_block.z == b.thread_per_block.z && in operator ==() 229 a.thread_per_block.x == b.thread_per_block.x && in operator ==() 230 a.thread_per_block.y == b.thread_per_block.y && in operator ==() 231 a.thread_per_block.z == b.thread_per_block.z; in operator ==() [all …]
|
/external/tensorflow/tensorflow/core/kernels/ |
D | bias_op_gpu.cu.cc | 90 config.thread_per_block, 0, d.stream(), in compute() 95 config.thread_per_block, 0, d.stream(), in compute() 234 config.block_count, config.thread_per_block, in compute() 241 if (config.thread_per_block < kWarpSize) { in compute() 242 config.thread_per_block = kWarpSize; in compute() 245 config.block_count, config.thread_per_block, in compute() 255 BiasGradNHWC_Naive<T>, config.block_count, config.thread_per_block, 0, in compute() 259 config.thread_per_block, 0, d.stream(), in compute()
|
D | depthtospace_op_gpu.cu.cc | 165 D2S_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 201 D2S_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()() 208 D2S_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()() 215 D2S_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()() 230 D2S_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
|
D | inplace_ops_functor_gpu.cu.cc | 54 DoParallelConcatOpKernel<T>, cfg.block_count, cfg.thread_per_block, 0, in DoParallelConcatUpdate() 124 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp() 130 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp() 136 cfg.thread_per_block, 0, d.stream(), in DoInplaceOp() 158 cfg.block_count, cfg.thread_per_block, 0, in DoInplaceOp()
|
D | spacetodepth_op_gpu.cu.cc | 162 S2D_NHWC<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 198 S2D_NCHW_LOOP<T, 2>, config.block_count, config.thread_per_block, in operator ()() 205 S2D_NCHW_LOOP<T, 3>, config.block_count, config.thread_per_block, in operator ()() 212 S2D_NCHW_LOOP<T, 4>, config.block_count, config.thread_per_block, in operator ()() 227 S2D_NCHW<T>, config.block_count, config.thread_per_block, 0, d.stream(), in operator ()()
|
D | dilation_ops_gpu.cu.cc | 200 DilationKernel<T>, config.block_count, config.thread_per_block, 0, in operator ()() 234 config.thread_per_block, 0, d.stream(), in operator ()() 242 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 276 config.thread_per_block, 0, d.stream(), in operator ()() 284 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
|
D | cwise_op_clip_gpu.cu.cc | 74 UnaryClipCustomKernel<T>, config.block_count, config.thread_per_block, in operator ()() 91 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()() 107 config.thread_per_block, 0, d.stream(), in0_flat.size(), in operator ()()
|
D | segment_reduction_ops_gpu.cu.cc | 148 config.thread_per_block, 0, d.stream(), in operator ()() 175 config.block_count, config.thread_per_block, 0, d.stream(), in operator ()() 195 SetToValue<T>, config.block_count, config.thread_per_block, 0, in operator ()() 213 config.thread_per_block, 0, d.stream(), input_outer_dim_size, in operator ()()
|
D | split_lib_gpu.cu.cc | 203 config.thread_per_block, 0, d.stream(), input, in Run() 219 config.thread_per_block, 0, gpu_device.stream(), in Run() 233 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptr, in Run() 238 config.thread_per_block, 0, gpu_device.stream(), input_ptr, in Run()
|
D | diag_op_gpu.cu.cc | 66 DiagGpuKernel<T>, diag_config.block_count, diag_config.thread_per_block, in operator ()() 104 diag_config.thread_per_block, 0, device.stream(), in operator ()()
|
D | conv_2d_gpu.h | 498 config.block_count, config.thread_per_block, 504 config.block_count, config.thread_per_block, 535 config.block_count, config.thread_per_block, 549 config.block_count, config.thread_per_block, 587 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, 593 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, 1028 config.block_count, config.thread_per_block, 0, 1078 block_count = (total_size + config.thread_per_block - 1) / 1079 config.thread_per_block; 1086 block_count, config.thread_per_block / kUnroll, 0, [all …]
|
D | gather_functor_gpu.cu.h | 101 config.thread_per_block, 0, d.stream(), params.data(), indices.data(), 109 config.thread_per_block, 0, d.stream(), params.data(), indices.data(),
|
D | bucketize_op_gpu.cu.cc | 101 config.block_count, config.thread_per_block, in Compute() 108 config.thread_per_block, 0, d.stream(), input.size(), input.data(), in Compute()
|
D | bincount_op_gpu.cu.cc | 127 config.thread_per_block, 0, d.stream(), arr.data(), in Compute() 214 config.block_count, config.thread_per_block, smem_usage, d.stream(), in Compute() 220 config.thread_per_block, 0, d.stream(), in.data(), weights.data(), in Compute()
|
D | concat_lib_gpu_impl.cu.cc | 149 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, split_size, in ConcatGPUImpl() 163 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptrs, in ConcatGPUImpl() 169 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, in ConcatGPUImpl()
|
D | searchsorted_op_gpu.cu.cc | 77 config.thread_per_block, 0, device.stream(), sorted_inputs.data(), in Compute() 100 config.thread_per_block, 0, device.stream(), sorted_inputs.data(), in Compute()
|
/external/tensorflow/tensorflow/core/kernels/sparse/ |
D | kernels_gpu.cu.cc | 158 config.block_count, config.thread_per_block, 0, in operator ()() 163 config.block_count, config.thread_per_block, 0, in operator ()() 234 config.block_count, config.thread_per_block, 0, in operator ()() 251 config.thread_per_block, shared_memory_size, d.stream(), in operator ()() 321 config.thread_per_block, shared_memory_size, d.stream(), a_values.data(), in CSRSparseMatrixBatchMulVecImpl() 435 config.block_count, config.thread_per_block, 0, in CSRSparseMatrixSoftmaxGPUImpl() 456 config.block_count, config.thread_per_block, in CSRSparseMatrixSoftmaxGPUImpl() 631 config.thread_per_block, 0, d.stream(), rows /*size*/, in CSRSparseMatrixSoftmaxGradGPUImpl() 662 config.thread_per_block, shared_memory_size, d.stream(), size, rows, in CSRSparseMatrixSoftmaxGradGPUImpl()
|
/external/tensorflow/tensorflow/core/kernels/image/ |
D | crop_and_resize_op_gpu.cu.cc | 377 CropAndResizeKernel<T>, config.block_count, config.thread_per_block, in operator ()() 413 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()() 429 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 463 SetZero<float>, config.block_count, config.thread_per_block, 0, in operator ()() 473 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
|
D | resize_nearest_neighbor_op_gpu.cu.cc | 184 GpuLaunchKernel(kernel, config.block_count, config.thread_per_block, 0, in operator ()() 221 output_config.thread_per_block, 0, d.stream(), in operator ()() 238 kernel, input_config.block_count, input_config.thread_per_block, 0, in operator ()()
|
D | resize_bilinear_op_gpu.cu.cc | 444 GpuLaunchKernel(kernel, config.block_count, config.thread_per_block, 0, in operator ()() 493 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 500 SetZero<T>, config.block_count, config.thread_per_block, 0, in operator ()() 508 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()() 515 config.thread_per_block, 0, d.stream(), config.virtual_thread_count, in operator ()()
|
D | adjust_saturation_op_gpu.cu.cc | 36 const int threads_per_block = config.thread_per_block; in operator ()()
|
D | adjust_hue_op_gpu.cu.cc | 35 const int threads_per_block = config.thread_per_block; in operator ()()
|
D | non_max_suppression_op.cu.cc | 273 config.thread_per_block, 0, device.stream(), in NmsGpu() 319 config.thread_per_block, 0, device.stream(), in NmsGpu() 463 config.thread_per_block, 0, device.stream(), in DoNMS() 484 config.thread_per_block, 0, device.stream(), in DoNMS() 526 config.thread_per_block, 0, device.stream(), in DoNMS() 540 IndexMultiSelect<int, int>, config.block_count, config.thread_per_block, in DoNMS()
|
/external/tensorflow/tensorflow/core/kernels/linalg/ |
D | matrix_set_diag_op_gpu.cu.cc | 121 config.thread_per_block, 0, device.stream(), in Compute() 129 config.thread_per_block, 0, device.stream(), in Compute()
|