Home
last modified time | relevance | path

Searched refs:threadIdx (Results 1 – 25 of 59) sorted by relevance

123

/external/eigen/unsupported/Eigen/CXX11/src/Tensor/
DTensorContractionCuda.h67 const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; in EigenContractionKernelInternal()
68 const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; in EigenContractionKernelInternal()
97 const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; in EigenContractionKernelInternal()
121 const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ in EigenContractionKernelInternal()
122 const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ in EigenContractionKernelInternal()
123 const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ in EigenContractionKernelInternal()
124 const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ in EigenContractionKernelInternal()
125 const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ in EigenContractionKernelInternal()
126 const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ in EigenContractionKernelInternal()
127 const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ in EigenContractionKernelInternal()
[all …]
DTensorReductionCuda.h115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel()
129 const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; in FullReductionKernel()
136 if (threadIdx.x == 0) { in FullReductionKernel()
174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel()
178 if (gridDim.x > 1 && threadIdx.x == 0) { in FullReductionKernel()
205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat()
223 const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; in FullReductionKernelHalfFloat()
250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat()
265 eigen_assert(threadIdx.x == 1); in ReductionCleanupKernelHalfFloat()
390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
[all …]
DTensorConvolution.h587 for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
590 const int plane_kernel_offset = threadIdx.y * num_x_input;
592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
641 for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
644 const int plane_kernel_offset = threadIdx.z * num_y_input;
648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
[all …]
DTensorRandom.h22 assert(threadIdx.z == 0); in get_random_seed()
24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed()
25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
/external/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu7 out[i++] = threadIdx.x; in kernel()
8 threadIdx.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
9 out[i++] = threadIdx.y; in kernel()
10 threadIdx.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
11 out[i++] = threadIdx.z; in kernel()
12 threadIdx.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
43 …__cuda_builtin_threadIdx_t y = threadIdx; // expected-error {{calling a private constructor of cla… in kernel()
46threadIdx = threadIdx; // expected-error {{'operator=' is a private member of '__cuda_builtin_thre… in kernel()
49 …void *ptr = &threadIdx; // expected-error {{'operator&' is a private member of '__cuda_builtin_thr… in kernel()
/external/llvm-project/clang/test/SemaCUDA/
Dcuda-builtin-vars.cu7 out[i++] = threadIdx.x; in kernel()
8 threadIdx.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()
9 out[i++] = threadIdx.y; in kernel()
10 threadIdx.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()
11 out[i++] = threadIdx.z; in kernel()
12 threadIdx.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()
43 …__cuda_builtin_threadIdx_t y = threadIdx; // expected-error {{calling a private constructor of cla… in kernel()
46threadIdx = threadIdx; // expected-error {{'operator=' is a private member of '__cuda_builtin_thre… in kernel()
49 …void *ptr = &threadIdx; // expected-error {{'operator&' is a private member of '__cuda_builtin_thr… in kernel()
/external/angle/src/tests/gl_tests/
DVulkanMultithreadingTest.cpp75 for (size_t threadIdx = 0; threadIdx < threadCount; threadIdx++) in runMultithreadedGLTest() local
77 threads[threadIdx] = std::thread([&, threadIdx]() { in runMultithreadedGLTest()
98 testBody(surface, threadIdx); in runMultithreadedGLTest()
DMultithreadingTest.cpp55 for (size_t threadIdx = 0; threadIdx < threadCount; threadIdx++) in runMultithreadedGLTest() local
57 threads[threadIdx] = std::thread([&, threadIdx]() { in runMultithreadedGLTest()
78 testBody(surface, threadIdx); in runMultithreadedGLTest()
470 for (size_t threadIdx = 0; threadIdx < kThreadCount; threadIdx++) in TEST_P() local
472 threads[threadIdx] = std::thread([&, threadIdx]() { in TEST_P()
473 contexts[threadIdx] = EGL_NO_CONTEXT; in TEST_P()
475 contexts[threadIdx] = window->createContext(EGL_NO_CONTEXT); in TEST_P()
476 EXPECT_NE(EGL_NO_CONTEXT, contexts[threadIdx]); in TEST_P()
486 EXPECT_TRUE(eglDestroyContext(dpy, contexts[threadIdx])); in TEST_P()
/external/llvm-project/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu9 out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() in kernel()
10 out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() in kernel()
11 out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() in kernel()
/external/clang/test/CodeGenCUDA/
Dcuda-builtin-vars.cu9 out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() in kernel()
10 out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() in kernel()
11 out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() in kernel()
/external/tensorflow/tensorflow/core/kernels/
Dconcat_lib_gpu_impl.cu.cc41 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_fixed_kernel()
44 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_fixed_kernel()
69 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in concat_variable_kernel()
77 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in concat_variable_kernel()
108 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in concat_variable_kernel()
Dbias_op_gpu.cu.cc133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
138 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; in BiasGradNHWC_SharedAtomics()
145 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { in BiasGradNHWC_SharedAtomics()
158 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { in BiasGradNCHW_SharedAtomics()
169 for (int32 index = group_index * blockDim.x + threadIdx.x; in BiasGradNCHW_SharedAtomics()
180 int bias_offset = threadIdx.x % 32; in BiasGradNCHW_SharedAtomics()
186 int32 thread_index = threadIdx.x; in BiasGradNCHW_SharedAtomics()
Dreduction_gpu_kernels.cu.h168 const int tid = threadIdx.x;
208 int warp_index = threadIdx.x / TF_RED_WARPSIZE;
210 const int lane = threadIdx.x % TF_RED_WARPSIZE;
213 int gid = threadIdx.x + blockIdx.x * blockDim.x;
275 const int lane = threadIdx.x % TF_RED_WARPSIZE;
279 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y);
314 sum, static_cast<int>(threadIdx.x + i * num_cols), 0xffffffff);
319 partial_sums[lane * (TF_RED_WARPSIZE + 1) + threadIdx.y] = sum;
323 if (threadIdx.y == 0 && threadIdx.x < num_cols) {
324 value_type s = partial_sums[threadIdx.x * (TF_RED_WARPSIZE + 1)];
[all …]
Ddebug_ops_gpu.cu.cc41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CurtHealthKernel()
59 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ConciseHealthKernel()
90 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in FullHealthKernel()
133 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReduceInfNanThreeSlotsKernel()
Dsplit_lib_gpu.cu.cc120 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; in split_v_kernel()
128 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; in split_v_kernel()
159 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; in split_v_kernel()
Dcheck_numerics_op_gpu.cu.cc41 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernel()
64 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; in CheckNumericsKernelV2()
Dbucketize_op_gpu.cu.cc45 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; in BucketizeCustomKernel()
/external/llvm-project/llvm/test/Analysis/DivergenceAnalysis/NVPTX/
Ddiverge.ll6 ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
26 ; if (threadIdx.x < 5) // divergent: data dependent
45 ; if (threadIdx.x >= 5) { // divergent
48 ; // c here is divergent because it is sync dependent on threadIdx.x >= 5
/external/skqp/src/compute/hs/cuda/sm_35/
Dhs_cuda_macros.h182 #define HS_GLOBAL_ID_X() (blockDim.x * blockIdx.x + threadIdx.x)
183 #define HS_LOCAL_ID_X() threadIdx.x
184 #define HS_WARP_ID_X() (threadIdx.x / 32)
185 #define HS_LANE_ID() (threadIdx.x & 31)
/external/llvm-project/mlir/test/Transforms/
Dparametric-mapping.mlir29 // threadIdx.x + blockIdx.x * blockDim.x
32 // thread_offset = step * (threadIdx.x + blockIdx.x * blockDim.x)
/external/llvm-project/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/
Ddiverge.ll6 ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
26 ; if (threadIdx.x < 5) // divergent: data dependent
45 ; if (threadIdx.x >= 5) { // divergent
48 ; // c here is divergent because it is sync dependent on threadIdx.x >= 5
/external/llvm/test/Analysis/DivergenceAnalysis/NVPTX/
Ddiverge.ll6 ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)
26 ; if (threadIdx.x < 5) // divergent: data dependent
45 ; if (threadIdx.x >= 5) { // divergent
48 ; // c here is divergent because it is sync dependent on threadIdx.x >= 5
/external/tensorflow/tensorflow/core/kernels/image/
Dnon_max_suppression_op.cu.cc171 const int i = i_block_offset + threadIdx.x; in NMSKernel()
174 if (threadIdx.y == 0) { in NMSKernel()
177 shared_i_boxes[threadIdx.x] = box; in NMSKernel()
178 shared_i_areas[threadIdx.x] = (box.x2 - box.x1) * (box.y2 - box.y1); in NMSKernel()
183 kNmsBoxesPerThread * (blockIdx.y * blockDim.y + threadIdx.y); in NMSKernel()
200 const Box i_box = shared_i_boxes[threadIdx.x]; in NMSKernel()
202 if (OverThreshold<float>(&i_box, &j_box, shared_i_areas[threadIdx.x], in NMSKernel()
/external/eigen/test/
Dcuda_common.h11 dim3 threadIdx, blockDim, blockIdx; variable
26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/
Dcuda_op_kernel.cu.cc22 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; in AddOneKernel()

123