1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ 18 19 #include "absl/types/optional.h" 20 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 21 #include "tensorflow/compiler/xla/service/hlo_instructions.h" 22 #include "tensorflow/compiler/xla/status.h" 23 #include "tensorflow/compiler/xla/statusor.h" 24 #include "tensorflow/compiler/xla/types.h" 25 #include "tensorflow/compiler/xla/xla_data.pb.h" 26 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 27 28 namespace xla { 29 namespace gpu { 30 31 struct RunConvOptions { 32 // Nullable output-parameter pointer for profiling results. 33 se::dnn::ProfileResult* profile_result = nullptr; 34 35 // Use this algorithm, instead of the one from the instruction. 36 absl::optional<se::dnn::AlgorithmDesc> algo_override; 37 }; 38 39 // This file contains low-level routines for running cudnn convolutions. 40 41 // Calls into cudnn to run the specified convolution. 42 // 43 // We provide one overload which takes a scratch buffer, and another which takes 44 // an allocator which is responsible for allocating the scratch space. In 45 // theory the second one shouldn't be necessary -- users of this function could 46 // just ask cudnn how much scratch space it needs for a particular convolution. 47 // But in practice, StreamExecutor does not expose such an API, and in the name 48 // of parsimony, perhaps it's better not to add it. Instead, the first time you 49 // call a convolution, you should call the version that takes a scratch 50 // allocator and take note of how much memory is used. The next time you call 51 // the same conv, you can provide an explicitly preallocated scratch buffer of 52 // that size, if you like. 53 Status RunCudnnConv(const HloCustomCallInstruction* conv, 54 absl::Span<se::DeviceMemoryBase> operand_buffers, 55 se::DeviceMemoryBase result_buffer, 56 se::DeviceMemoryBase scratch_buf, se::Stream* stream, 57 RunConvOptions = {}); 58 59 Status RunCudnnConv(const HloCustomCallInstruction* conv, 60 absl::Span<se::DeviceMemoryBase> operand_buffers, 61 se::DeviceMemoryBase result_buffer, 62 se::ScratchAllocator* scratch_allocator, se::Stream* stream, 63 RunConvOptions = {}); 64 65 } // namespace gpu 66 } // namespace xla 67 68 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ 69