/external/tensorflow/tensorflow/core/kernels/ |
D | reduction_gpu_kernels.cu.h | 509 const cudaStream_t& cu_stream) { 516 num_threads, 0, cu_stream, in, out, in_size, op, init)); 536 num_blocks, num_threads, 0, cu_stream, in, 545 cu_stream, 555 in_size, op, init, cu_stream); 576 const cudaStream_t& cu_stream) { 583 threads_per_block, 0, cu_stream, in, out, 598 transform_iter + 1, op, init, cu_stream); 619 const cudaStream_t& cu_stream) { 637 cu_stream, in, out, extent_x, extent_y, op, init)); [all …]
|
D | where_op_gpu.cu.h | 142 const cudaStream_t& cu_stream = GetCudaStream(ctx); 155 /*stream*/ cu_stream); 174 /*stream*/ cu_stream); 269 const cudaStream_t& cu_stream = GetCudaStream(ctx); 292 /*stream*/ cu_stream); 311 /*stream*/ cu_stream);
|
D | dynamic_partition_op_gpu.cu.cc | 329 const cudaStream_t& cu_stream = GetCudaStream(c); in RadixSort() local 343 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); in RadixSort() 355 0, sizeof(int32) * 8, cu_stream); in RadixSort() 362 const cudaStream_t& cu_stream = GetCudaStream(c); in CountAndSortParts() local 413 num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts() 428 num_runs_ptr, reduction_op, N, cu_stream); in CountAndSortParts()
|
D | topk_op_gpu.h | 438 const cudaStream_t& cu_stream = GetCudaStream(ctx); 487 /* stream */ cu_stream); 512 /* stream */ cu_stream); 552 const cudaStream_t& cu_stream = GetCudaStream(context); 553 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
|
D | softmax_op_gpu.cu.cc | 147 const cudaStream_t& cu_stream = GetCudaStream(context); in Compute() local 185 cu_stream, reinterpret_cast<const T*>(logits_in_.flat<T>().data()), in Compute()
|
/external/tensorflow/tensorflow/contrib/image/kernels/ |
D | adjust_hsv_in_yiq_op_gpu.cu.cc | 50 auto* cu_stream = ctx->eigen_device<GPUDevice>().stream(); in operator ()() local 51 OP_REQUIRES(ctx, cu_stream, errors::Internal("No GPU stream available.")); in operator ()() 59 1, 0, cu_stream, delta_h, scale_s, scale_v, in operator ()()
|
/external/tensorflow/tensorflow/core/nccl/ |
D | nccl_manager.cc | 544 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( in LoopKernelLaunches() local 578 << " cuda_stream " << cu_stream; in LoopKernelLaunches() 581 nccl_comm, *cu_stream); in LoopKernelLaunches() 588 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches() 598 collective->root_rank, nccl_comm, *cu_stream); in LoopKernelLaunches() 610 << cu_stream; in LoopKernelLaunches() 612 data_type, nccl_comm, *cu_stream); in LoopKernelLaunches()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | nccl_all_reduce_thunk.cc | 286 cudaStream_t* cu_stream = reinterpret_cast<cudaStream_t*>( in DoAllReduce() local 288 VLOG(3) << "Using stream pointer: " << cu_stream in DoAllReduce() 297 /*stream=*/*cu_stream); in DoAllReduce()
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
D | lstm_ops_gpu.cu.cc | 235 const cudaStream_t& cu_stream = GetCudaStream(ctx); in LSTMBlockCellFpropWithCUDA() local 245 TF_CHECK_OK(CudaLaunchKernel(concat_xh<T>, grid_dim, block_dim, 0, cu_stream, in LSTMBlockCellFpropWithCUDA() 266 lstm_gates<T, true>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellFpropWithCUDA() 272 lstm_gates<T, false>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellFpropWithCUDA() 374 const cudaStream_t& cu_stream = GetCudaStream(ctx); in LSTMBlockCellBpropWithCUDA() local 381 lstm_gates_bprop<T>, grid_dim_2d, block_dim_2d, 0, cu_stream, in LSTMBlockCellBpropWithCUDA()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.cc | 187 CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; in GetHandle() local 188 const auto status = cudnnSetStream(handle_, cu_stream); in GetHandle()
|