Searched refs:threadIdx (Results 1 – 14 of 14) sorted by relevance
67 const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; in EigenContractionKernelInternal()68 const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; in EigenContractionKernelInternal()97 const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; in EigenContractionKernelInternal()121 const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ in EigenContractionKernelInternal()122 const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ in EigenContractionKernelInternal()123 const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ in EigenContractionKernelInternal()124 const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ in EigenContractionKernelInternal()125 const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ in EigenContractionKernelInternal()126 const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ in EigenContractionKernelInternal()127 const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ in EigenContractionKernelInternal()[all …]
115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernel()129 const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; in FullReductionKernel()136 if (threadIdx.x == 0) { in FullReductionKernel()174 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernel()178 if (gridDim.x > 1 && threadIdx.x == 0) { in FullReductionKernel()205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; in ReductionInitKernelHalfFloat()223 const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; in FullReductionKernelHalfFloat()250 if ((threadIdx.x & (warpSize - 1)) == 0) { in FullReductionKernelHalfFloat()265 eigen_assert(threadIdx.x == 1); in ReductionCleanupKernelHalfFloat()390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;[all …]
587 for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {590 const int plane_kernel_offset = threadIdx.y * num_x_input;592 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {603 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {641 for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {644 const int plane_kernel_offset = threadIdx.z * num_y_input;648 for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {651 for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {663 for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {665 for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {[all …]
22 assert(threadIdx.z == 0); in get_random_seed()24 blockIdx.x * blockDim.x + threadIdx.x + in get_random_seed()25 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); in get_random_seed()
239 const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
253 Index val = threadIdx.x + blockIdx.x * blockDim.x;
7 out[i++] = threadIdx.x; in kernel()8 threadIdx.x = 0; // expected-error {{no setter defined for property 'x'}} in kernel()9 out[i++] = threadIdx.y; in kernel()10 threadIdx.y = 0; // expected-error {{no setter defined for property 'y'}} in kernel()11 out[i++] = threadIdx.z; in kernel()12 threadIdx.z = 0; // expected-error {{no setter defined for property 'z'}} in kernel()43 …__cuda_builtin_threadIdx_t y = threadIdx; // expected-error {{calling a private constructor of cla… in kernel()46 …threadIdx = threadIdx; // expected-error {{'operator=' is a private member of '__cuda_builtin_thre… in kernel()49 …void *ptr = &threadIdx; // expected-error {{'operator&' is a private member of '__cuda_builtin_thr… in kernel()
9 out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() in kernel()10 out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() in kernel()11 out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() in kernel()
6 ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x)26 ; if (threadIdx.x < 5) // divergent: data dependent45 ; if (threadIdx.x >= 5) { // divergent48 ; // c here is divergent because it is sync dependent on threadIdx.x >= 5
11 dim3 threadIdx, blockDim, blockIdx; variable26 int i = threadIdx.x + blockIdx.x*blockDim.x; in run_on_cuda_meta_kernel()
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; variable
458 int threadIdx = cvGetThreadNum(); in cvCalcOpticalFlowPyrLK() local459 float* patchI = _patchI[threadIdx]; in cvCalcOpticalFlowPyrLK()460 float* patchJ = _patchJ[threadIdx]; in cvCalcOpticalFlowPyrLK()461 float* Ix = _Ix[threadIdx]; in cvCalcOpticalFlowPyrLK()462 float* Iy = _Iy[threadIdx]; in cvCalcOpticalFlowPyrLK()
68 y[threadIdx.x] = a * x[threadIdx.x];
565 ``i32 @llvm.nvvm.read.ptx.sreg.tid.{x,y,z}`` threadIdx.{x,y,z}