Home
last modified time | relevance | path

Searched refs:gridDim (Results 1 – 25 of 46) sorted by relevance

12

/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu28 out[i++] = gridDim.x; in kernel()
29 gridDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
30 out[i++] = gridDim.y; in kernel()
31 gridDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
32 out[i++] = gridDim.z; in kernel()
33 gridDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
/external/llvm-project/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu28 out[i++] = gridDim.x; in kernel()
29 gridDim.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
30 out[i++] = gridDim.y; in kernel()
31 gridDim.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
32 out[i++] = gridDim.z; in kernel()
33 gridDim.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcudart_stub.cc114 extern cudaError_t CUDARTAPI __cudaPopCallConfiguration(dim3 *gridDim, in __cudaPopCallConfiguration() argument
118 using FuncPtr = cudaError_t(CUDARTAPI *)(dim3 * gridDim, dim3 * blockDim, in __cudaPopCallConfiguration()
122 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPopCallConfiguration()
126 dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) { in __cudaPushCallConfiguration() argument
127 using FuncPtr = unsigned(CUDARTAPI *)(dim3 gridDim, dim3 blockDim, in __cudaPushCallConfiguration()
131 return func_ptr(gridDim, blockDim, sharedMem, stream); in __cudaPushCallConfiguration()
/external/llvm-project/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu21 out[i++] = gridDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() in kernel()
22 out[i++] = gridDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() in kernel()
23 out[i++] = gridDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() in kernel()
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu21 out[i++] = gridDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() in kernel()
22 out[i++] = gridDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() in kernel()
23 out[i++] = gridDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() in kernel()
/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorReductionCuda.h116 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernel()
130 if (gridDim.x == 1) { in FullReductionKernel()
158 eigen_assert(gridDim.x == 1 || *semaphore >= 2u); in FullReductionKernel()
178 if (gridDim.x > 1 && threadIdx.x == 0) { in FullReductionKernel()
180 atomicInc(semaphore, gridDim.x + 1); in FullReductionKernel()
193 eigen_assert(gridDim.x == 1); in ReductionInitFullReduxKernelHalfFloat()
206 const Index num_threads = blockDim.x * gridDim.x; in ReductionInitKernelHalfFloat()
226 if (gridDim.x == 1 && first_index == 0) { in FullReductionKernelHalfFloat()
256 if (gridDim.x == 1 && first_index == 0) { in FullReductionKernelHalfFloat()
380 eigen_assert(gridDim.y == 1);
[all …]
/external/tensorflow/tensorflow/core/kernels/
Ddebug_ops_gpu.cu.cc42 const int32 total_thread_count = gridDim.x * blockDim.x; in CurtHealthKernel()
60 const int32 total_thread_count = gridDim.x * blockDim.x; in ConciseHealthKernel()
91 const int32 total_thread_count = gridDim.x * blockDim.x; in FullHealthKernel()
134 const int32 total_thread_count = gridDim.x * blockDim.x; in ReduceInfNanThreeSlotsKernel()
Dconcat_lib_gpu_impl.cu.cc43 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_fixed_kernel()
50 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) { in concat_fixed_kernel()
97 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in concat_variable_kernel()
109 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in concat_variable_kernel()
Dcheck_numerics_op_gpu.cu.cc42 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernel()
65 const int32 total_thread_count = gridDim.x * blockDim.x; in CheckNumericsKernelV2()
Drandom_op_gpu.h147 const int32 total_thread_count = gridDim.x * blockDim.x;
189 const int32 total_thread_count = gridDim.x * blockDim.x;
Dsplit_lib_gpu.cu.cc148 for (; gidx < total_cols; gidx += blockDim.x * gridDim.x) { in split_v_kernel()
160 for (; gidy < total_rows; gidy += blockDim.y * gridDim.y) in split_v_kernel()
Dsoftmax_op_gpu.cu.cc90 tid += gridDim.x * blockDim.x; in GenerateNormalizedProb()
105 tid += gridDim.x * blockDim.x; in GenerateNormalizedProb()
Dstateful_random_ops_gpu.cu.cc56 auto total_thread_count = gridDim.x * blockDim.x; in FillKernel()
Dreduction_gpu_kernels.cu.h171 const int stride = blockDim.x * gridDim.x;
303 row += rows_per_warp * gridDim.y * blockDim.y;
304 for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) {
333 out[col * gridDim.y + blockIdx.y] = s;
364 row += gridDim.y * blockDim.y;
367 for (; row < num_rows; row += gridDim.y * blockDim.y) {
397 out[col * gridDim.y + blockIdx.y] = s;
/external/llvm-project/clang/test/CodeGenCUDA/Inputs/
Dcuda.h25 extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
38 extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
/external/llvm-project/clang/test/SemaCUDA/Inputs/
Dcuda.h28 extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
42 extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
/external/llvm-project/mlir/test/Transforms/
Dparametric-mapping.mlir38 // stepXgdimx = step * gridDim.x
41 // new_step = step * gridDim.x * blockDim.x
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/
Dcuda_op_kernel.cu.cc23 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/tensorflow/tensorflow/examples/adding_an_op/
Dcuda_op_kernel.cu.cc24 i += blockDim.x * gridDim.x) { in AddOneKernel()
/external/tensorflow/tensorflow/core/util/
Dgpu_device_functions.h168 /*delta=*/gridDim.x * blockDim.x, /*end=*/count); in GpuGridRangeX()
178 /*delta=*/gridDim.y * blockDim.y, /*end=*/count); in GpuGridRangeY()
188 /*delta=*/gridDim.z * blockDim.z, /*end=*/count); in GpuGridRangeZ()
564 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetZero()
576 assert(blockDim.x * gridDim.x / blockDim.x == gridDim.x); in SetToValue()
/external/clang/lib/Headers/
Dcuda_builtin_vars.h115 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; variable
/external/llvm-project/clang/lib/Headers/
D__clang_cuda_builtin_vars.h109 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; variable
D__clang_cuda_runtime_wrapper.h437 extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
/external/llvm-project/openmp/libomptarget/deviceRTLs/nvptx/src/
Dtarget_impl.h208 INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } in GetNumberOfBlocksInKernel()
/external/tensorflow/tensorflow/core/kernels/image/
Dadjust_hsv_gpu.cu.h103 idx < number_elements; idx += blockDim.x * gridDim.x * 3) { in adjust_hsv_nhwc()

12