1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/executable.h"
17 
18 #include "absl/memory/memory.h"
19 #include "absl/strings/str_format.h"
20 #include "tensorflow/compiler/xla/debug_options_flags.h"
21 #include "tensorflow/compiler/xla/service/dump.h"
22 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
23 #include "tensorflow/compiler/xla/status.h"
24 #include "tensorflow/compiler/xla/status_macros.h"
25 #include "tensorflow/core/lib/hash/hash.h"
26 #include "tensorflow/core/lib/io/path.h"
27 #include "tensorflow/core/lib/strings/proto_serialization.h"
28 #include "tensorflow/core/platform/env.h"
29 
30 namespace xla {
31 
ExecuteOnStreams(absl::Span<const ServiceExecutableRunOptions> run_options,absl::Span<const absl::Span<const ShapedBuffer * const>> arguments)32 StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(
33     absl::Span<const ServiceExecutableRunOptions> run_options,
34     absl::Span<const absl::Span<const ShapedBuffer* const>> arguments) {
35   TF_RET_CHECK(run_options.size() == arguments.size());
36 
37   std::vector<ScopedShapedBuffer> return_values;
38   return_values.reserve(run_options.size());
39 
40   if (run_options.size() == 1) {
41     TF_ASSIGN_OR_RETURN(auto rv,
42                         ExecuteOnStream(&run_options[0], arguments[0],
43                                         /*hlo_execution_profile=*/nullptr));
44     return_values.push_back(std::move(rv));
45     return std::move(return_values);
46   }
47 
48   for (size_t i = 0; i < run_options.size(); ++i) {
49     // We cannot BlockHostUntilDone() on the already-launched executions in case
50     // of error, since if the executions communicate, the initially launched
51     // executions may never complete if not all executions are running.
52     TF_ASSIGN_OR_RETURN(auto rv,
53                         ExecuteAsyncOnStream(&run_options[i], arguments[i]));
54     return_values.push_back(std::move(rv));
55   }
56   for (const auto& options : run_options) {
57     TF_RET_CHECK(options.stream() != nullptr);
58     TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
59   }
60   return std::move(return_values);
61 }
62 
ExecuteOnStreamWrapper(const ServiceExecutableRunOptions * run_options,ExecutionProfile * profile,absl::Span<const ShapedBuffer * const> arguments)63 StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
64     const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
65     absl::Span<const ShapedBuffer* const> arguments) {
66   se::Stream* stream = run_options->stream();
67   std::unique_ptr<se::Timer> timer;
68   if (profile != nullptr) {
69     timer.reset(new se::Timer(stream->parent()));
70     stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
71   }
72 
73   VLOG(1) << "enqueueing executable on stream...";
74   // If the profiling flag isn't enabled, we pass nullptr as the profile to
75   // indicate profiling is not requested.
76   std::unique_ptr<HloExecutionProfile> profile_ptr =
77       module_config().debug_options().xla_hlo_profile() &&
78               hlo_profiling_enabled()
79           ? absl::make_unique<HloExecutionProfile>(&hlo_profile_printer_data(),
80                                                    &hlo_profile_index_map())
81           : nullptr;
82 
83   StatusOr<ScopedShapedBuffer> return_value =
84       ExecuteOnStream(run_options, arguments, profile_ptr.get());
85   if (!return_value.status().ok()) {
86     if (profile != nullptr) {
87       // Ensure the ThenStartTimer call has completed before we destroy timer.
88       // We already have a failure status to return, so just log this if it
89       // fails.
90       Status status = stream->BlockHostUntilDone();
91       if (!status.ok()) {
92         LOG(ERROR) << "Failed to BlockHostUntilDone: " << status;
93       }
94     }
95     return return_value.status();
96   }
97 
98   if (profile != nullptr) {
99     VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
100     stream->ThenStopTimer(timer.get());
101     TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
102     VLOG(1) << "done with block-host-until-done";
103 
104     // Merge in run-time profile information from execution_profile.
105     //
106     // TODO(b/71713097): This is buggy -- even though the mutex takes care of
107     // C++ level races, some other concurrent ExecuteOnStreamWrapper call could
108     // have rewritten the execution_profile before we get to it.
109     profile->MergeFrom(execution_profile());
110 
111     // Overall execution time (in nanoseconds) from the executor timer.
112     if (stream->ok()) {
113       // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
114       // illegal.
115       profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
116     }
117 
118     // TODO(b/28123297): On GPU we end up including transfer time in
119     // the compute time this way. Instead, we should get the correct
120     // value by measuring it. Setting the field here at least lets
121     // benchmarks provide *some* value for GPU computations.
122     //
123     // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
124     // the compute time without the transfer time, so this way we get the
125     // correct compute time. We should instead have the correct value for
126     // compute_and_transfer_time and set compute_time to the compute time.
127     if (profile->compute_time_ns() == 0) {
128       profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
129     }
130 
131     const int64 executable_size_in_bytes = SizeInBytes();
132     if (executable_size_in_bytes != 0) {
133       profile->set_executable_size_in_bytes(executable_size_in_bytes);
134     }
135   }
136 
137   if (profile_ptr != nullptr) {
138     XLA_LOG_LINES(
139         tensorflow::INFO,
140         profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
141   }
142 
143   return return_value;
144 }
145 
SizeInBytes()146 int64 Executable::SizeInBytes() { return -1; }
147 
148 }  // namespace xla
149