/external/clang/test/SemaCUDA/ |
D | cuda-builtin-vars.cu | 21 out[i++] = blockDim.x; in kernel() 22 blockDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel() 23 out[i++] = blockDim.y; in kernel() 24 blockDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel() 25 out[i++] = blockDim.z; in kernel() 26 blockDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | concat_lib_gpu_impl.cu.cc | 41 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel() 43 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_fixed_kernel() 44 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel() 50 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) { in concat_fixed_kernel() 69 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel() 77 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel() 78 IntType blockSize = blockDim.x * blockDim.y; in concat_variable_kernel() 97 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_variable_kernel() 108 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel() 109 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in concat_variable_kernel()
|
D | split_lib_gpu.cu.cc | 84 eigen_assert(blockDim.y == 1); in SplitOpKernel() 85 eigen_assert(blockDim.z == 1); in SplitOpKernel() 124 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel() 132 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel() 133 IntType blockSize = blockDim.x * blockDim.y; in split_v_kernel() 152 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in split_v_kernel() 163 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel() 164 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in split_v_kernel() 179 eigen_assert(blockDim.y == 1); in SplitVOpKernel_fixed() 180 eigen_assert(blockDim.z == 1); in SplitVOpKernel_fixed()
|
D | bias_op_gpu.cu.cc | 130 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 135 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics() 136 index += blockDim.x * gridDim.x) { in BiasGradNHWC_SharedAtomics() 142 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics() 156 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics() 167 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics() 168 index < total_count; index += blockDim.x * group_size) { in BiasGradNCHW_SharedAtomics()
|
D | random_op_gpu.h | 135 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; 136 const int32 total_thread_count = gridDim.x * blockDim.x; 174 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; 175 const int32 total_thread_count = gridDim.x * blockDim.x;
|
D | bucketize_op_gpu.cu.cc | 46 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel() 47 int32 blockSize = blockDim.x * blockDim.y; in BucketizeCustomKernel()
|
D | check_numerics_op_gpu.cu.cc | 40 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel() 41 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernel()
|
D | reduction_gpu_kernels.cu.h | 200 const int gid = bid * blockDim.x + tid; 201 const int stride = blockDim.x * gridDim.x; 222 max(min(num_elems - bid * blockDim.x, num_threads), 0); 236 assert(blockDim.x % 32 == 0); 237 int warps_per_block = blockDim.x / 32; 243 int gid = threadIdx.x + blockIdx.x * blockDim.x; 308 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); 325 row += rows_per_warp * gridDim.y * blockDim.y; 326 for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { 347 if (blockDim.y > 1) { [all …]
|
D | depthwise_conv_op_gpu.h | 195 assert(blockDim.x == kBlockDepth); in DepthwiseConv2dGPUKernelNHWCSmall() 196 assert(blockDim.y == args.in_cols); in DepthwiseConv2dGPUKernelNHWCSmall() 197 const int block_height = blockDim.z; in DepthwiseConv2dGPUKernelNHWCSmall() 480 assert(blockDim.x == args.in_cols); in DepthwiseConv2dGPUKernelNCHWSmall() 481 assert(blockDim.z == kBlockDepth); in DepthwiseConv2dGPUKernelNCHWSmall() 482 const int block_height = blockDim.y; in DepthwiseConv2dGPUKernelNCHWSmall() 1170 assert(CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, blockDim.z)); 1178 const int in_width = blockDim.y; // slower (see b/62280718): args.in_cols; 1187 assert(blockDim.x == kBlockDepth); 1188 assert(blockDim.y == args.in_cols); [all …]
|
D | relu_op_gpu.cu.cc | 41 int32 index = blockIdx.x * blockDim.x + threadIdx.x; in ReluGradHalfKernel() 42 const int32 total_device_threads = gridDim.x * blockDim.x; in ReluGradHalfKernel()
|
D | parameterized_truncated_normal_op_gpu.cu.cc | 62 const int32 initial_offset = blockIdx.x * blockDim.x + threadIdx.x; in TruncatedNormalKernel() 85 max_samples_per_item * (gridDim.x * blockDim.x); in TruncatedNormalKernel()
|
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
D | TensorReductionCuda.h | 115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel() 116 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernel() 192 eigen_assert(blockDim.x == 1); in ReductionInitFullReduxKernelHalfFloat() 205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat() 206 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernelHalfFloat() 378 eigen_assert(blockDim.y == 1); 379 eigen_assert(blockDim.z == 1); 386 const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); 389 const Index num_threads = blockDim.x * gridDim.x; 390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; [all …]
|
D | TensorConvolution.h | 584 const int first_plane = blockIdx.y * blockDim.y; 585 const int plane_stride = blockDim.y * gridDim.y; 592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { 638 const int first_plane = blockIdx.z * blockDim.z; 639 const int plane_stride = blockDim.z * gridDim.z; 648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { 651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { 663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { 665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { [all …]
|
D | TensorRandom.h | 24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed() 25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
|
/external/tensorflow/tensorflow/core/util/ |
D | cuda_device_functions.h | 90 return detail::CudaGridRange<T>(blockIdx.x * blockDim.x + threadIdx.x, in CudaGridRangeX() 91 gridDim.x * blockDim.x, count); in CudaGridRangeX() 98 return detail::CudaGridRange<T>(blockIdx.y * blockDim.y + threadIdx.y, in CudaGridRangeY() 99 gridDim.y * blockDim.y, count); in CudaGridRangeY() 106 return detail::CudaGridRange<T>(blockIdx.z * blockDim.z + threadIdx.z, in CudaGridRangeZ() 107 gridDim.z * blockDim.z, count); in CudaGridRangeZ() 386 assert(blockDim.y == 1 && blockDim.z == 1); in SetZero() 387 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetZero() 397 assert(blockDim.y == 1 && blockDim.z == 1); in SetToValue() 398 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetToValue()
|
/external/clang/test/CodeGenCUDA/ |
D | cuda-builtin-vars.cu | 17 out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() in kernel() 18 out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() in kernel() 19 out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() in kernel()
|
/external/tensorflow/tensorflow/examples/adding_an_op/ |
D | cuda_op_kernel.cu.cc | 22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel() 23 i += blockDim.x * gridDim.x) { in AddOneKernel()
|
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/ |
D | cuda_op_kernel.cu.cc | 22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel() 23 i += blockDim.x * gridDim.x) { in AddOneKernel()
|
/external/tensorflow/tensorflow/contrib/mpi_collectives/kernels/ |
D | ring.cu.cc | 90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in elemwise_accum() 91 i += blockDim.x * gridDim.x) { in elemwise_accum()
|
/external/tensorflow/tensorflow/contrib/mpi_collectives/ |
D | ring.cu.cc | 90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in elemwise_accum() 91 i += blockDim.x * gridDim.x) { in elemwise_accum()
|
/external/eigen/test/ |
D | cuda_common.h | 11 dim3 threadIdx, blockDim, blockIdx; variable 26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
D | lstm_ops_gpu.cu.cc | 91 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates() 92 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates() 206 const int gid = blockDim.x * blockIdx.x + threadIdx.x; in concat_xh() 304 const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; in lstm_gates_bprop() 305 const int act_id = blockIdx.y * blockDim.y + threadIdx.y; in lstm_gates_bprop()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/tests_data/ |
D | saxpy.ll | 8 @blockDim = external addrspace(1) global %struct.dim3 36 …(%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*)… 88 …(%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*)…
|
/external/tensorflow/tensorflow/contrib/tensorrt/custom_plugin_examples/ |
D | inc_op_kernel.cu.cc | 33 int i = blockDim.x * blockIdx.x + threadIdx.x; in VecInc()
|
/external/clang/lib/Headers/ |
D | cuda_builtin_vars.h | 114 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; variable
|