Home
last modified time | relevance | path

Searched refs:cu_stream (Results 1 – 12 of 12) sorted by relevance

/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dnccl_all_to_all_thunk.cc68 cudaStream_t* cu_stream = reinterpret_cast<cudaStream_t*>( in RunNcclCollective() local
102 *cu_stream)); in RunNcclCollective()
105 *cu_stream)); in RunNcclCollective()
127 *cu_stream)); in RunNcclCollective()
130 *cu_stream)); in RunNcclCollective()
Dnccl_all_gather_thunk.cc76 cudaStream_t* cu_stream = reinterpret_cast<cudaStream_t*>( in RunNcclCollective() local
96 static_cast<const void*>(comm), cu_stream); in RunNcclCollective()
100 *cu_stream)); in RunNcclCollective()
Dnccl_all_reduce_thunk.cc127 cudaStream_t* cu_stream = reinterpret_cast<cudaStream_t*>( in RunNcclCollective() local
147 static_cast<const void*>(comm), cu_stream); in RunNcclCollective()
151 reduce_op, comm, *cu_stream)); in RunNcclCollective()
/external/tensorflow/tensorflow/core/kernels/
Dreduction_gpu_kernels.cu.h651 const gpuStream_t& cu_stream) {
657 num_blocks, num_threads, 0, cu_stream, in, out,
679 num_blocks, num_threads, 0, cu_stream, in,
688 TF_RED_WARPSIZE, 0, cu_stream,
698 out, in_size, op, init, cu_stream);
719 const gpuStream_t& cu_stream) {
726 threads_per_block, 0, cu_stream, in, out,
742 transform_iter + 1, op, init, cu_stream);
763 const gpuStream_t& cu_stream) {
783 grid_dim, block_dim, 0, cu_stream, in, out,
[all …]
Dwhere_op_gpu.cu.h140 const auto& cu_stream = GetGpuStream(ctx);
153 /*stream*/ cu_stream);
172 /*stream*/ cu_stream);
267 const auto& cu_stream = GetGpuStream(ctx);
290 /*stream*/ cu_stream);
310 /*stream*/ cu_stream);
Ddynamic_partition_op_gpu.cu.cc339 const auto& cu_stream = GetGpuStream(c); in RadixSort() local
353 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); in RadixSort()
365 0, sizeof(int32) * 8, cu_stream); in RadixSort()
372 const auto& cu_stream = GetGpuStream(c); in CountAndSortParts() local
429 aggregates_out_it, num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts()
444 N, cu_stream); in CountAndSortParts()
Dtopk_op_gpu.h456 const auto& cu_stream = GetGpuStream(ctx);
505 /* stream */ cu_stream);
530 /* stream */ cu_stream);
570 const auto& cu_stream = GetGpuStream(context);
571 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
Dsoftmax_op_gpu.cu.cc213 const auto& cu_stream = GetGpuStream(context); in Compute() local
258 numThreadsPerBlock, 0, cu_stream, in Compute()
270 numThreadsPerBlock, 0, cu_stream, in Compute()
/external/tensorflow/tensorflow/core/nccl/
Dnccl_manager.cc705 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( in LoopKernelLaunches() local
745 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
754 nccl_comm, *cu_stream); in LoopKernelLaunches()
782 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
791 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
807 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
820 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
828 data_type, nccl_comm, *cu_stream); in LoopKernelLaunches()
/external/tensorflow/tensorflow/core/kernels/rnn/
Dlstm_ops_gpu.cu.cc240 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellFpropWithCUDA() local
250 TF_CHECK_OK(GpuLaunchKernel(concat_xh<T>, grid_dim, block_dim, 0, cu_stream, in LSTMBlockCellFpropWithCUDA()
272 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA()
279 cu_stream, gates.data(), b.data(), cs_prev.data(), wci.data(), in LSTMBlockCellFpropWithCUDA()
381 const auto& cu_stream = GetGpuStream(ctx); in LSTMBlockCellBpropWithCUDA() local
388 lstm_gates_bprop<T, gate_layout>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellBpropWithCUDA()
/external/tensorflow/tensorflow/core/kernels/sparse/
Dkernels_gpu.cu.cc56 const auto& cu_stream = GetGpuStream(c); in operator ()() local
85 /*stream*/ cu_stream); in operator ()()
109 /*stream*/ cu_stream); in operator ()()
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcuda_dnn.cc187 CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; in GetHandle() local
188 const auto status = cudnnSetStream(handle_, cu_stream); in GetHandle()