1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17
18 #include <stdio.h>
19
20 #include <utility>
21
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/core/framework/step_stats.pb.h"
26 #include "tensorflow/core/platform/regexp.h"
27 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
28
29 namespace tensorflow {
30 namespace tfprof {
31 namespace {
32
33 const char* const kProfilePrefix = "Profile:\n";
34
CreateRunMetadataNode(const string & name,NodeDef * def)35 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
36 // TODO(xpan): Better solution than denylisting this 2 nodes. They
37 // actually cost some resources, maybe include them. Some nodes, such
38 // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
39 if (name == "RecvTensor" || name == "_SOURCE" ||
40 name.find("MEMCPY") != name.npos) {
41 return false;
42 }
43 def->set_name(name);
44 // TODO(xpan): Better operation type.
45 // This is because some times a node doesn't have a op type,
46 // so we use node name as the op type.
47 def->set_op(name);
48 return true;
49 }
50 } // namespace
51
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)52 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
53 std::unique_ptr<RunMetadata> run_meta,
54 std::unique_ptr<OpLogProto> op_log,
55 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
56 : has_code_traces_(false),
57 miss_accelerator_stream_(false),
58 ckpt_reader_(std::move(ckpt_reader)) {
59 CHECK(graph) << "Must at least have GraphDef";
60
61 AddGraph(std::move(graph));
62 if (run_meta && run_meta->has_step_stats()) {
63 AddRunMeta(0, std::move(run_meta));
64 }
65 AddOpLogProto(std::move(op_log));
66
67 if (ckpt_reader_) {
68 for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
69 auto node = nodes_map_.find(v.first);
70 if (node != nodes_map_.end()) {
71 node->second->AddOpType("_checkpoint_variables");
72 }
73 }
74 }
75 }
76
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)77 TFStats::TFStats(const string& filename,
78 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
79 : has_code_traces_(false),
80 miss_accelerator_stream_(false),
81 ckpt_reader_(std::move(ckpt_reader)) {
82 string str;
83 Status s = ReadFileToString(Env::Default(), filename, &str);
84 if (!s.ok()) {
85 absl::FPrintF(stderr, "Failed to read profile: %s", s.ToString());
86 return;
87 }
88
89 ProfileProto profile;
90 if (!profile.ParseFromString(str)) {
91 absl::FPrintF(stderr, "Failed to parse profile\n");
92 return;
93 }
94 for (const auto& entry : profile.id_to_string()) {
95 id_to_string_[entry.first] = entry.second;
96 }
97 for (const auto& node_pb : profile.nodes()) {
98 std::unique_ptr<TFGraphNode> node(
99 new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
100 nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
101 node_pb.second.name(), std::move(node)));
102 }
103 has_code_traces_ = profile.has_trace();
104 for (int64 s : profile.steps()) {
105 steps_.insert(s);
106 }
107 }
108
BuildView(const string & cmd)109 void TFStats::BuildView(const string& cmd) {
110 if (cmd == kCmds[0] && !scope_view_) {
111 scope_view_.reset(new TFScope(ckpt_reader_.get()));
112 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
113 scope_view_->AddNode(it->second.get());
114 }
115 scope_view_->Build();
116 }
117 if (cmd == kCmds[1] && !graph_view_) {
118 graph_view_.reset(new TFGraph(ckpt_reader_.get()));
119 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
120 graph_view_->AddNode(it->second.get());
121 }
122 graph_view_->Build();
123 }
124 if (cmd == kCmds[2] && !code_view_) {
125 code_view_.reset(new TFCode());
126 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
127 code_view_->AddNode(it->second.get());
128 }
129 code_view_->Build();
130 }
131 if (cmd == kCmds[3] && !op_view_) {
132 op_view_.reset(new TFOp());
133 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
134 op_view_->AddNode(it->second.get());
135 }
136 op_view_->Build();
137 }
138 }
139
BuildAllViews()140 void TFStats::BuildAllViews() {
141 std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
142 for (const string& cmd : cmds_str) {
143 BuildView(cmd);
144 }
145 }
146
ShowGraphNode(const string & cmd,const Options & opts) const147 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
148 const Options& opts) const {
149 if (!Validate(opts)) {
150 return empty_graph_node_;
151 }
152 string prefix = MaybeReportMissingTrace();
153 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
154
155 if (cmd == kCmds[0]) {
156 return scope_view_->Show(prefix, opts);
157 } else if (cmd == kCmds[1]) {
158 if (opts.step < 0 && opts.output_type == kOutput[0]) {
159 for (int64 step : steps_) {
160 Options nopts = opts;
161 nopts.step = step;
162 graph_view_->Show(prefix, nopts);
163 }
164 return empty_graph_node_;
165 }
166 return graph_view_->Show(prefix, opts);
167 } else {
168 absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
169 return empty_graph_node_;
170 }
171 }
172
ShowMultiGraphNode(const string & cmd,const Options & opts) const173 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
174 const string& cmd, const Options& opts) const {
175 if (!Validate(opts)) {
176 return empty_multi_graph_node_;
177 }
178 string prefix = MaybeReportMissingTrace();
179 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
180
181 if (cmd == kCmds[2]) {
182 if (!has_code_traces()) {
183 absl::FPrintF(stderr, "No code trace information\n");
184 return empty_multi_graph_node_;
185 }
186 return code_view_->Show(prefix, opts);
187 } else if (cmd == kCmds[3]) {
188 return op_view_->Show(prefix, opts);
189 } else {
190 absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
191 return empty_multi_graph_node_;
192 }
193 }
194
AddGraph(std::unique_ptr<GraphDef> graph)195 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
196 std::map<string, const NodeDef*> node_defs;
197 bool node_added = false;
198 for (const NodeDef& node : graph->node()) {
199 if (nodes_map_.find(node.name()) != nodes_map_.end()) {
200 continue;
201 }
202 node_added = true;
203 size_t num_nodes = nodes_map_.size();
204 nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
205 new TFGraphNode(&node, num_nodes, &nodes_map_));
206 node_defs[node.name()] = &node;
207 }
208 for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
209 TFGraphNode* node = nodes_map_.at(it->first).get();
210 for (int i = 0; i < it->second->input_size(); ++i) {
211 string node_input = it->second->input(i);
212 int output_idx = 0;
213 // input name format can be: "^node:src_output"
214 // if not :src_output, then it's the first one (further verify?)
215 auto prefix_pos = node_input.find(':');
216 if (prefix_pos != node_input.npos) {
217 std::vector<string> input_parts = absl::StrSplit(node_input, ':');
218 DCHECK(input_parts.size() == 2)
219 << "Unknown NodeDef.input format: " << node_input;
220 node_input = input_parts[0];
221 DCHECK(absl::SimpleAtoi(input_parts[1], &output_idx))
222 << "Failed to parse integer: " << output_idx;
223 }
224 if (node_input.substr(0, 1) == "^") {
225 node_input = node_input.substr(1);
226 }
227 // Delay input TFGraphNode retrieval as late as possible.
228 // In long run, when we have TensorFlow runtime graph, the
229 // graph connection should be dynamic and per-step.
230 node->AddInput(node_input, output_idx, i);
231 }
232 }
233 if (node_added) {
234 graph_view_.reset(nullptr);
235 scope_view_.reset(nullptr);
236 op_view_.reset(nullptr);
237 code_view_.reset(nullptr);
238 }
239 }
240
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)241 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
242 if (!op_log) {
243 return;
244 }
245 for (const auto& entry : op_log->id_to_string()) {
246 if (id_to_string_.find(entry.first) == id_to_string_.end()) {
247 id_to_string_[entry.first] = entry.second;
248 }
249 }
250 for (const OpLogEntry& entry : op_log->log_entries()) {
251 auto node = nodes_map_.find(entry.name());
252 if (node == nodes_map_.end()) continue;
253 for (const string& type : entry.types()) {
254 node->second->AddOpType(type);
255 }
256 if (entry.float_ops()) {
257 node->second->AddFloatOps(entry.float_ops());
258 }
259 if (entry.has_code_def()) {
260 has_code_traces_ = true;
261 node->second->AddCode(entry.code_def(), &id_to_string_);
262 }
263 }
264 }
265
AddRunMeta(int64 step,std::unique_ptr<RunMetadata> run_meta)266 void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
267 if (!run_meta || !run_meta->has_step_stats()) {
268 absl::FPrintF(stderr, "Invalid RunMetadata for step %d\n", step);
269 return;
270 }
271 if (steps_.find(step) == steps_.end()) {
272 steps_.insert(step);
273 }
274 steps_.insert(step);
275
276 bool has_gpu_scheduling = false;
277 bool has_gpu_stream = false;
278
279 for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
280 string dev = absl::AsciiStrToLower(dev_stat.device());
281 if (IsPlacedOnAccelerator(dev)) {
282 has_gpu_scheduling = true;
283 if (CountAsAcceleratorTime(dev)) {
284 has_gpu_stream = true;
285 }
286 }
287 for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
288 string name = node_stat.node_name();
289 // Sometimes the node_name is suffixed with unnecessary information.
290 auto split_pos = node_stat.node_name().find(':');
291 if (split_pos != node_stat.node_name().npos) {
292 name = node_stat.node_name().substr(0, split_pos);
293 }
294 auto node = nodes_map_.find(name);
295 if (node == nodes_map_.end()) {
296 NodeDef def;
297 if (CreateRunMetadataNode(name, &def)) {
298 size_t num_nodes = nodes_map_.size();
299 nodes_map_[name] = std::unique_ptr<TFGraphNode>(
300 new TFGraphNode(&def, num_nodes, &nodes_map_));
301 nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
302 }
303 } else {
304 covered_nodes_.insert(node->second->id());
305 node->second->AddStepStat(step, dev_stat.device(), node_stat);
306 }
307 }
308 }
309
310 if (has_gpu_scheduling && !has_gpu_stream) {
311 miss_accelerator_stream_ = true;
312 }
313 }
314
MaybeReportMissingTrace() const315 string TFStats::MaybeReportMissingTrace() const {
316 string report = "";
317 if (miss_accelerator_stream_) {
318 report +=
319 "\n\nFound accelerator operation but misses accelerator "
320 "stream stats!\n\n"
321 "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
322 "If you found your operation missing accelerator time, "
323 "consider to post to discuss@tensorflow.org!\n\n";
324 }
325 return report;
326 }
327
SerializeToString(string * content)328 void TFStats::SerializeToString(string* content) {
329 ProfileProto profile;
330 for (const auto& entry : id_to_string_) {
331 (*profile.mutable_id_to_string())[entry.first] = entry.second;
332 }
333 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
334 if (it->second->id() < 0) {
335 continue;
336 }
337 (*profile.mutable_nodes())[it->second->id()].MergeFrom(
338 it->second->ToProto(nodes_map_));
339 }
340
341 profile.set_has_trace(has_code_traces_);
342 profile.set_miss_accelerator_stream(miss_accelerator_stream_);
343 for (int64 s : steps_) {
344 profile.add_steps(s);
345 }
346 *content = profile.SerializeAsString();
347 }
348
WriteProfile(const string & filename)349 void TFStats::WriteProfile(const string& filename) {
350 string content;
351 SerializeToString(&content);
352 Status s = WriteStringToFile(Env::Default(), filename, content);
353 if (!s.ok()) {
354 absl::FPrintF(stderr, "%s\n", s.ToString());
355 }
356 }
357
Validate(const Options & opts) const358 bool TFStats::Validate(const Options& opts) const {
359 if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
360 absl::FPrintF(stderr,
361 "Options -step=%d not found.\nAvailable steps: ", opts.step);
362 for (int64 s : steps_) {
363 absl::FPrintF(stderr, "%d ", s);
364 }
365 absl::FPrintF(stderr, "\n");
366 return false;
367 }
368 return true;
369 }
370
AddNodeForTest(int64 step,std::unique_ptr<TFGraphNode> node)371 void TFStats::AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node) {
372 steps_.insert(step);
373 nodes_map_[node->name()] = std::move(node);
374 }
375 } // namespace tfprof
376 } // namespace tensorflow
377