1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/lib/profiler_session.h"
17 #include <cstddef>
18 #include <string>
19 #include "tensorflow/core/common_runtime/eager/context.h"
20 #include "tensorflow/core/lib/core/error_codes.pb.h"
21 #include "tensorflow/core/platform/env.h"
22 #include "tensorflow/core/platform/mutex.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/internal/gpu/tracer.h"
25 #include "tensorflow/core/profiler/internal/runtime/eager_profiler.h"
26 #include "tensorflow/core/profiler/trace_events.pb.h"
27 #include "tensorflow/core/protobuf/config.pb.h"
28 
29 namespace tensorflow {
30 
31 namespace {
32 
33 // Track whether there's an active ProfilerSession.
34 // Prevents another ProfilerSession from creating ProfilerInterface(s), as they
35 // use singletons that do not allow concurrent profiling request (e.g.,
36 // DeviceTracer).
37 std::atomic<bool> session_active = ATOMIC_VAR_INIT(false);
38 
AssignLanes(RunMetadata * run_metadata)39 void AssignLanes(RunMetadata* run_metadata) {
40   for (size_t device_id = 0;
41        device_id < run_metadata->step_stats().dev_stats_size(); ++device_id) {
42     auto* device_stats =
43         run_metadata->mutable_step_stats()->mutable_dev_stats(device_id);
44     if (device_stats->thread_names_size() > 0 ||
45         device_stats->node_stats_size() == 0) {
46       continue;
47     }
48     std::vector<uint64> lanes;
49     for (auto ns = device_stats->mutable_node_stats()->rbegin();
50          ns != device_stats->mutable_node_stats()->rend(); ns++) {
51       uint64 end_micros = ns->all_start_micros() + ns->all_end_rel_micros();
52       bool found_lane = false;
53       for (size_t l = 0; l < lanes.size(); l++) {
54         if (end_micros <= lanes[l]) {
55           ns->set_thread_id(l);
56           found_lane = true;
57           lanes[l] = ns->all_start_micros();
58           break;
59         }
60       }
61       if (!found_lane) {
62         ns->set_thread_id(lanes.size());
63         lanes.push_back(ns->all_start_micros());
64       }
65     }
66   }
67 }
68 
ConvertRunMetadataToTraceEvent(RunMetadata * run_metadata,profiler::Trace * trace,const uint64 profile_start_time_micros)69 void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
70                                     profiler::Trace* trace,
71                                     const uint64 profile_start_time_micros) {
72   AssignLanes(run_metadata);
73   auto trace_devices = trace->mutable_devices();
74 
75   for (size_t device_id = 0;
76        device_id < run_metadata->step_stats().dev_stats_size(); ++device_id) {
77     // Create device
78     auto* device_stats =
79         run_metadata->mutable_step_stats()->mutable_dev_stats(device_id);
80     profiler::Device device;
81     device.set_name(device_stats->device());
82     device.set_device_id(device_id);
83     profiler::Resource resource;
84     resource.set_name("0");
85     resource.set_resource_id(0);
86     (*device.mutable_resources())[0] = resource;
87     for (const auto& thread_name : device_stats->thread_names()) {
88       profiler::Resource resource;
89       resource.set_resource_id(thread_name.first);
90       resource.set_name(thread_name.second);
91       (*device.mutable_resources())[thread_name.first] = resource;
92     }
93     (*trace_devices)[device_id] = device;
94 
95     // Emit events.
96     for (auto node :
97          run_metadata->step_stats().dev_stats(device_id).node_stats()) {
98       if (node.all_start_micros() < profile_start_time_micros) {
99         continue;
100       }
101       auto* event = trace->add_trace_events();
102       auto* args = event->mutable_args();
103       event->set_device_id(device_id);
104       event->set_resource_id(node.thread_id());
105       event->set_name(node.node_name());
106       event->set_timestamp_ps(
107           (node.all_start_micros() - profile_start_time_micros) *
108           EnvTime::kMicrosToPicos);
109       event->set_duration_ps(node.all_end_rel_micros() *
110                              EnvTime::kMicrosToPicos);
111       (*args)["label"] = node.timeline_label();
112     }
113   }
114 
115   // TODO(fishx): Convert allocation data as well.
116 }
117 
118 }  // namespace
119 
Create(ProfilerContext * const context)120 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create(
121     ProfilerContext* const context) {
122   return absl::WrapUnique(new ProfilerSession(context));
123 }
124 
Status()125 Status ProfilerSession::Status() {
126   mutex_lock l(mutex_);
127   return status_;
128 }
129 
SerializeToString(string * content)130 Status ProfilerSession::SerializeToString(string* content) {
131   mutex_lock l(mutex_);
132   if (!status_.ok()) return status_;
133   for (auto& profiler : profilers_) {
134     profiler->Stop().IgnoreError();
135   }
136   RunMetadata run_metadata;
137   for (auto& profiler : profilers_) {
138     profiler->CollectData(&run_metadata).IgnoreError();
139   }
140 
141   if (active_) {
142     // Allow another session to start.
143     session_active.store(false);
144     active_ = false;
145   }
146 
147   profiler::Trace trace;
148 
149   ConvertRunMetadataToTraceEvent(&run_metadata, &trace, start_time_micros_);
150 
151   trace.SerializeToString(content);
152   return Status::OK();
153 }
154 
ProfilerSession(ProfilerContext * const context)155 ProfilerSession::ProfilerSession(ProfilerContext* const context)
156     : active_(!session_active.exchange(true)),
157       start_time_micros_(Env::Default()->NowNanos() / EnvTime::kMicrosToNanos) {
158   if (!active_) {
159     status_ = tensorflow::Status(tensorflow::error::Code::UNAVAILABLE,
160                                  "Another profiling session is active.");
161     return;
162   }
163 
164   LOG(INFO) << "Profile Session started.";
165 
166   if (context->eager_context != nullptr) {
167     profilers_.push_back(tensorflow::profiler::runtime::EagerProfiler::Create(
168         context->eager_context));
169   }
170   profilers_.push_back(tensorflow::profiler::gpu::Tracer::Create());
171 
172   status_ = Status::OK();
173 
174   for (auto& profiler : profilers_) {
175     profiler->Start().IgnoreError();
176   }
177 }
178 
~ProfilerSession()179 ProfilerSession::~ProfilerSession() {
180   for (auto& profiler : profilers_) {
181     profiler->Stop().IgnoreError();
182   }
183 
184   if (active_) {
185     // Allow another session to start.
186     session_active.store(false);
187   }
188 }
189 
190 }  // namespace tensorflow
191