1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
18 
19 #include "absl/types/optional.h"
20 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
21 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
22 #include "tensorflow/compiler/xla/status.h"
23 #include "tensorflow/compiler/xla/statusor.h"
24 #include "tensorflow/compiler/xla/types.h"
25 #include "tensorflow/compiler/xla/xla_data.pb.h"
26 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
27 
28 namespace xla {
29 namespace gpu {
30 
31 struct RunConvOptions {
32   // Nullable output-parameter pointer for profiling results.
33   se::dnn::ProfileResult* profile_result = nullptr;
34 
35   // Use this algorithm, instead of the one from the instruction.
36   absl::optional<se::dnn::AlgorithmDesc> algo_override;
37 };
38 
39 // This file contains low-level routines for running cudnn convolutions.
40 
41 // Calls into cudnn to run the specified convolution.
42 //
43 // We provide one overload which takes a scratch buffer, and another which takes
44 // an allocator which is responsible for allocating the scratch space.  In
45 // theory the second one shouldn't be necessary -- users of this function could
46 // just ask cudnn how much scratch space it needs for a particular convolution.
47 // But in practice, StreamExecutor does not expose such an API, and in the name
48 // of parsimony, perhaps it's better not to add it.  Instead, the first time you
49 // call a convolution, you should call the version that takes a scratch
50 // allocator and take note of how much memory is used.  The next time you call
51 // the same conv, you can provide an explicitly preallocated scratch buffer of
52 // that size, if you like.
53 Status RunCudnnConv(const HloCustomCallInstruction* conv,
54                     absl::Span<se::DeviceMemoryBase> operand_buffers,
55                     se::DeviceMemoryBase result_buffer,
56                     se::DeviceMemoryBase scratch_buf, se::Stream* stream,
57                     RunConvOptions = {});
58 
59 Status RunCudnnConv(const HloCustomCallInstruction* conv,
60                     absl::Span<se::DeviceMemoryBase> operand_buffers,
61                     se::DeviceMemoryBase result_buffer,
62                     se::ScratchAllocator* scratch_allocator, se::Stream* stream,
63                     RunConvOptions = {});
64 
65 }  // namespace gpu
66 }  // namespace xla
67 
68 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
69