1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/kernels/gpu_utils.h"
17
18 #if GOOGLE_CUDA
19
20 #include "google/protobuf/any.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/tensor.pb.h"
23 #include "tensorflow/core/platform/logger.h"
24 #include "tensorflow/core/protobuf/autotuning.pb.h"
25 #include "tensorflow/core/protobuf/conv_autotuning.pb.h"
26 #include "tensorflow/core/util/proto/proto_utils.h"
27
28 namespace tensorflow {
29 namespace {
30
GetCudnnVersion(se::StreamExecutor * stream_executor)31 tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
32 tensorflow::CudnnVersion cudnn_version;
33 if (auto* dnn = stream_executor->AsDnn()) {
34 se::port::StatusOr<se::dnn::VersionInfo> version_or = dnn->GetVersion();
35 if (version_or.ok()) {
36 const auto& version = version_or.ValueOrDie();
37 cudnn_version.set_major(version.major_version());
38 cudnn_version.set_minor(version.minor_version());
39 cudnn_version.set_patch(version.patch());
40 }
41 }
42 return cudnn_version;
43 }
44
GetComputeCapability(se::StreamExecutor * stream_executor)45 tensorflow::ComputeCapability GetComputeCapability(
46 se::StreamExecutor* stream_executor) {
47 tensorflow::ComputeCapability cc;
48 int cc_major, cc_minor;
49 stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major,
50 &cc_minor);
51 cc.set_major(cc_major);
52 cc.set_minor(cc_minor);
53 return cc;
54 }
55
56 } // namespace
57
LogConvAutotuneResults(const NodeDef & node,const Tensor & input,const Tensor & filter,const Tensor & output,se::StreamExecutor * stream_exec,absl::Span<const AutotuneResult> results)58 void LogConvAutotuneResults(const NodeDef& node, const Tensor& input,
59 const Tensor& filter, const Tensor& output,
60 se::StreamExecutor* stream_exec,
61 absl::Span<const AutotuneResult> results) {
62 AutotuningLog log;
63 ConvNodeDef instr;
64 *instr.mutable_conv() = node;
65 input.shape().AsProto(instr.mutable_input()->mutable_tensor_shape());
66 instr.mutable_input()->set_dtype(input.dtype());
67 filter.shape().AsProto(instr.mutable_filter()->mutable_tensor_shape());
68 instr.mutable_filter()->set_dtype(filter.dtype());
69 output.shape().AsProto(instr.mutable_output()->mutable_tensor_shape());
70 instr.mutable_output()->set_dtype(output.dtype());
71 log.mutable_instr()->PackFrom(std::move(instr));
72 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec);
73 *log.mutable_compute_capability() = GetComputeCapability(stream_exec);
74 for (const auto& result : results) {
75 *log.add_results() = result;
76 }
77 Logger::Singleton()->LogProto(log);
78 }
79
LogFusedConvAutotuneResults(const NodeDef & node,const Tensor & input,const Tensor & filter,const Tensor & output,const Tensor & bias,const Tensor * side_input,se::StreamExecutor * stream_exec,absl::Span<const AutotuneResult> results)80 void LogFusedConvAutotuneResults(const NodeDef& node, const Tensor& input,
81 const Tensor& filter, const Tensor& output,
82 const Tensor& bias, const Tensor* side_input,
83 se::StreamExecutor* stream_exec,
84 absl::Span<const AutotuneResult> results) {
85 AutotuningLog log;
86 ConvNodeDef instr;
87 *instr.mutable_conv() = node;
88 input.shape().AsProto(instr.mutable_input()->mutable_tensor_shape());
89 instr.mutable_input()->set_dtype(input.dtype());
90 filter.shape().AsProto(instr.mutable_filter()->mutable_tensor_shape());
91 instr.mutable_filter()->set_dtype(filter.dtype());
92 output.shape().AsProto(instr.mutable_output()->mutable_tensor_shape());
93 instr.mutable_output()->set_dtype(output.dtype());
94 bias.shape().AsProto(instr.mutable_bias()->mutable_tensor_shape());
95 instr.mutable_bias()->set_dtype(bias.dtype());
96 if (side_input) {
97 side_input->shape().AsProto(
98 instr.mutable_side_input()->mutable_tensor_shape());
99 instr.mutable_side_input()->set_dtype(side_input->dtype());
100 }
101 log.mutable_instr()->PackFrom(std::move(instr));
102 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec);
103 *log.mutable_compute_capability() = GetComputeCapability(stream_exec);
104 for (const auto& result : results) {
105 *log.add_results() = result;
106 }
107 Logger::Singleton()->LogProto(log);
108 }
109
BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,se::dnn::AlgorithmConfig * algo)110 Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
111 se::dnn::AlgorithmConfig* algo) {
112 // For the "!xhs.has_success()" below, this is because we want successful ones
113 // to order first, therefore they need a smaller key per "min_element".
114 const AutotuneResult* best_result = std::min_element(
115 results.begin(), results.end(),
116 [](const AutotuneResult& lhs, const AutotuneResult& rhs) {
117 return std::make_tuple(
118 !lhs.has_success(),
119 proto_utils::FromDurationProto(lhs.success().run_time())) <
120 std::make_tuple(
121 !rhs.has_success(),
122 proto_utils::FromDurationProto(rhs.success().run_time()));
123 });
124
125 const AutotuneResult* best_result_no_scratch = std::min_element(
126 results.begin(), results.end(),
127 [](const AutotuneResult& lhs, const AutotuneResult& rhs) {
128 return std::make_tuple(
129 !lhs.has_success(), lhs.success().scratch_bytes(),
130 proto_utils::FromDurationProto(lhs.success().run_time())) <
131 std::make_tuple(
132 !rhs.has_success(), rhs.success().scratch_bytes(),
133 proto_utils::FromDurationProto(rhs.success().run_time()));
134 });
135
136 if (best_result == results.end() || !best_result->has_success()) {
137 return errors::NotFound("No algorithm worked!");
138 }
139 algo->set_algorithm({best_result->conv().algorithm(),
140 best_result->conv().tensor_ops_enabled()});
141 if (best_result_no_scratch != results.end() &&
142 best_result_no_scratch->has_success() &&
143 best_result_no_scratch->success().scratch_bytes() == 0) {
144 algo->set_algorithm_no_scratch(
145 {best_result_no_scratch->conv().algorithm(),
146 best_result_no_scratch->conv().tensor_ops_enabled()});
147 }
148 return Status::OK();
149 }
150
151 } // namespace tensorflow
152
153 #endif // GOOGLE_CUDA
154