Home
last modified time | relevance | path

Searched refs:cu_stream (Results 1 – 10 of 10) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
Dreduction_gpu_kernels.cu.h509 const cudaStream_t& cu_stream) {
516 num_threads, 0, cu_stream, in, out, in_size, op, init));
536 num_blocks, num_threads, 0, cu_stream, in,
545 cu_stream,
555 in_size, op, init, cu_stream);
576 const cudaStream_t& cu_stream) {
583 threads_per_block, 0, cu_stream, in, out,
598 transform_iter + 1, op, init, cu_stream);
619 const cudaStream_t& cu_stream) {
637 cu_stream, in, out, extent_x, extent_y, op, init));
[all …]
Dwhere_op_gpu.cu.h142 const cudaStream_t& cu_stream = GetCudaStream(ctx);
155 /*stream*/ cu_stream);
174 /*stream*/ cu_stream);
269 const cudaStream_t& cu_stream = GetCudaStream(ctx);
292 /*stream*/ cu_stream);
311 /*stream*/ cu_stream);
Ddynamic_partition_op_gpu.cu.cc329 const cudaStream_t& cu_stream = GetCudaStream(c); in RadixSort() local
343 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); in RadixSort()
355 0, sizeof(int32) * 8, cu_stream); in RadixSort()
362 const cudaStream_t& cu_stream = GetCudaStream(c); in CountAndSortParts() local
413 num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts()
428 num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts()
Dtopk_op_gpu.h438 const cudaStream_t& cu_stream = GetCudaStream(ctx);
487 /* stream */ cu_stream);
512 /* stream */ cu_stream);
552 const cudaStream_t& cu_stream = GetCudaStream(context);
553 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
Dsoftmax_op_gpu.cu.cc147 const cudaStream_t& cu_stream = GetCudaStream(context); in Compute() local
185 cu_stream, reinterpret_cast<const T*>(logits_in_.flat<T>().data()), in Compute()
/external/tensorflow/tensorflow/contrib/image/kernels/
Dadjust_hsv_in_yiq_op_gpu.cu.cc50 auto* cu_stream = ctx->eigen_device<GPUDevice>().stream(); in operator ()() local
51 OP_REQUIRES(ctx, cu_stream, errors::Internal("No GPU stream available.")); in operator ()()
59 1, 0, cu_stream, delta_h, scale_s, scale_v, in operator ()()
/external/tensorflow/tensorflow/core/nccl/
Dnccl_manager.cc544 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( in LoopKernelLaunches() local
578 << " cuda_stream " << cu_stream; in LoopKernelLaunches()
581 nccl_comm, *cu_stream); in LoopKernelLaunches()
588 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
598 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches()
610 << cu_stream; in LoopKernelLaunches()
612 data_type, nccl_comm, *cu_stream); in LoopKernelLaunches()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dnccl_all_reduce_thunk.cc286 cudaStream_t* cu_stream = reinterpret_cast<cudaStream_t*>( in DoAllReduce() local
288 VLOG(3) << "Using stream pointer: " << cu_stream in DoAllReduce()
297 /*stream=*/*cu_stream); in DoAllReduce()
/external/tensorflow/tensorflow/contrib/rnn/kernels/
Dlstm_ops_gpu.cu.cc235 const cudaStream_t& cu_stream = GetCudaStream(ctx); in LSTMBlockCellFpropWithCUDA() local
245 TF_CHECK_OK(CudaLaunchKernel(concat_xh<T>, grid_dim, block_dim, 0, cu_stream, in LSTMBlockCellFpropWithCUDA()
266 lstm_gates<T, true>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellFpropWithCUDA()
272 lstm_gates<T, false>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellFpropWithCUDA()
374 const cudaStream_t& cu_stream = GetCudaStream(ctx); in LSTMBlockCellBpropWithCUDA() local
381 lstm_gates_bprop<T>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellBpropWithCUDA()
/external/tensorflow/tensorflow/stream_executor/cuda/
Dcuda_dnn.cc187 CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; in GetHandle() local
188 const auto status = cudnnSetStream(handle_, cu_stream); in GetHandle()