1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17 
18 #include <stdio.h>
19 #include <utility>
20 
21 #include "tensorflow/core/framework/step_stats.pb.h"
22 #include "tensorflow/core/lib/strings/str_util.h"
23 #include "tensorflow/core/platform/regexp.h"
24 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
25 
26 namespace tensorflow {
27 namespace tfprof {
28 namespace {
29 
30 const char* const kProfilePrefix = "Profile:\n";
31 
CreateRunMetadataNode(const string & name,NodeDef * def)32 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
33   // TODO(xpan): Better solution than blacklisting this 2 nodes. They
34   // actually cost some resources, maybe include them. Some nodes, such
35   // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
36   if (name == "RecvTensor" || name == "_SOURCE" ||
37       name.find("MEMCPY") != name.npos) {
38     return false;
39   }
40   def->set_name(name);
41   // TODO(xpan): Better operation type.
42   // This is because some times a node doesn't have a op type,
43   // so we use node name as the op type.
44   def->set_op(name);
45   return true;
46 }
47 }  // namespace
48 
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)49 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
50                  std::unique_ptr<RunMetadata> run_meta,
51                  std::unique_ptr<OpLogProto> op_log,
52                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
53     : has_code_traces_(false),
54       miss_accelerator_stream_(false),
55       ckpt_reader_(std::move(ckpt_reader)) {
56   CHECK(graph) << "Must at least have GraphDef";
57 
58   printf("Parsing Inputs...\n");
59   AddGraph(std::move(graph));
60   if (run_meta && run_meta->has_step_stats()) {
61     AddRunMeta(0, std::move(run_meta));
62   }
63   AddOpLogProto(std::move(op_log));
64 
65   if (ckpt_reader_) {
66     for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
67       auto node = nodes_map_.find(v.first);
68       if (node != nodes_map_.end()) {
69         node->second->AddOpType("_checkpoint_variables");
70       }
71     }
72   }
73 }
74 
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)75 TFStats::TFStats(const string& filename,
76                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
77     : has_code_traces_(false),
78       miss_accelerator_stream_(false),
79       ckpt_reader_(std::move(ckpt_reader)) {
80   string str;
81   Status s = ReadFileToString(Env::Default(), filename, &str);
82   if (!s.ok()) {
83     fprintf(stderr, "Failed to read profile: %s", s.ToString().c_str());
84     return;
85   }
86 
87   ProfileProto profile;
88   if (!profile.ParseFromString(str)) {
89     fprintf(stderr, "Failed to parse profile\n");
90     return;
91   }
92   for (const auto& entry : profile.id_to_string()) {
93     id_to_string_[entry.first] = entry.second;
94   }
95   for (const auto& node_pb : profile.nodes()) {
96     std::unique_ptr<TFGraphNode> node(
97         new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
98     nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
99         node_pb.second.name(), std::move(node)));
100   }
101   has_code_traces_ = profile.has_trace();
102   for (int64 s : profile.steps()) {
103     steps_.insert(s);
104   }
105 }
106 
BuildView(const string & cmd)107 void TFStats::BuildView(const string& cmd) {
108   if (cmd == kCmds[0] && !scope_view_) {
109     scope_view_.reset(new TFScope(ckpt_reader_.get()));
110     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
111       scope_view_->AddNode(it->second.get());
112     }
113     scope_view_->Build();
114   }
115   if (cmd == kCmds[1] && !graph_view_) {
116     graph_view_.reset(new TFGraph(ckpt_reader_.get()));
117     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
118       graph_view_->AddNode(it->second.get());
119     }
120     graph_view_->Build();
121   }
122   if (cmd == kCmds[2] && !code_view_) {
123     code_view_.reset(new TFCode());
124     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
125       code_view_->AddNode(it->second.get());
126     }
127     code_view_->Build();
128   }
129   if (cmd == kCmds[3] && !op_view_) {
130     op_view_.reset(new TFOp());
131     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
132       op_view_->AddNode(it->second.get());
133     }
134     op_view_->Build();
135   }
136 }
137 
BuildAllViews()138 void TFStats::BuildAllViews() {
139   std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
140   for (const string& cmd : cmds_str) {
141     BuildView(cmd);
142   }
143 }
144 
ShowGraphNode(const string & cmd,const Options & opts) const145 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
146                                              const Options& opts) const {
147   if (!Validate(opts)) {
148     return empty_graph_node_;
149   }
150   string prefix = MaybeReportMissingTrace();
151   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
152 
153   if (cmd == kCmds[0]) {
154     return scope_view_->Show(prefix, opts);
155   } else if (cmd == kCmds[1]) {
156     if (opts.step < 0 && opts.output_type == kOutput[0]) {
157       for (int64 step : steps_) {
158         Options nopts = opts;
159         nopts.step = step;
160         graph_view_->Show(prefix, nopts);
161       }
162       return empty_graph_node_;
163     }
164     return graph_view_->Show(prefix, opts);
165   } else {
166     fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
167     return empty_graph_node_;
168   }
169 }
170 
ShowMultiGraphNode(const string & cmd,const Options & opts) const171 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
172     const string& cmd, const Options& opts) const {
173   if (!Validate(opts)) {
174     return empty_multi_graph_node_;
175   }
176   string prefix = MaybeReportMissingTrace();
177   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
178 
179   if (cmd == kCmds[2]) {
180     if (!has_code_traces()) {
181       fprintf(stderr, "No code trace information\n");
182       return empty_multi_graph_node_;
183     }
184     return code_view_->Show(prefix, opts);
185   } else if (cmd == kCmds[3]) {
186     return op_view_->Show(prefix, opts);
187   } else {
188     fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
189     return empty_multi_graph_node_;
190   }
191 }
192 
AddGraph(std::unique_ptr<GraphDef> graph)193 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
194   std::map<string, const NodeDef*> node_defs;
195   bool node_added = false;
196   for (const NodeDef& node : graph->node()) {
197     if (nodes_map_.find(node.name()) != nodes_map_.end()) {
198       continue;
199     }
200     node_added = true;
201     nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
202         new TFGraphNode(&node, nodes_map_.size(), &nodes_map_));
203     node_defs[node.name()] = &node;
204   }
205   for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
206     TFGraphNode* node = nodes_map_.at(it->first).get();
207     for (int i = 0; i < it->second->input_size(); ++i) {
208       string node_input = it->second->input(i);
209       int output_idx = 0;
210       // input name format can be: "^node:src_output"
211       // if not :src_output, then it's the first one (further verify?)
212       auto prefix_pos = node_input.find(":");
213       if (prefix_pos != node_input.npos) {
214         std::vector<string> input_parts = str_util::Split(node_input, ":");
215         CHECK(input_parts.size() == 2)
216             << "Unknown NodeDef.input format: " << node_input;
217         node_input = input_parts[0];
218         CHECK(strings::safe_strto32(input_parts[1], &output_idx))
219             << "Failed to parse integer: " << output_idx;
220       }
221       if (node_input.substr(0, 1) == "^") {
222         node_input = node_input.substr(1);
223       }
224       // Delay input TFGraphNode retrieval as late as possible.
225       // In long run, when we have TensorFlow runtime graph, the
226       // graph connection should be dynamic and per-step.
227       node->AddInput(node_input, output_idx, i);
228     }
229   }
230   if (node_added) {
231     graph_view_.reset(nullptr);
232     scope_view_.reset(nullptr);
233     op_view_.reset(nullptr);
234     code_view_.reset(nullptr);
235   }
236 }
237 
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)238 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
239   if (!op_log) {
240     return;
241   }
242   for (const auto& entry : op_log->id_to_string()) {
243     if (id_to_string_.find(entry.first) == id_to_string_.end()) {
244       id_to_string_[entry.first] = entry.second;
245     }
246   }
247   for (const OpLogEntry& entry : op_log->log_entries()) {
248     auto node = nodes_map_.find(entry.name());
249     if (node == nodes_map_.end()) continue;
250     for (const string& type : entry.types()) {
251       node->second->AddOpType(type);
252     }
253     if (entry.float_ops()) {
254       node->second->AddFloatOps(entry.float_ops());
255     }
256     if (entry.has_code_def()) {
257       has_code_traces_ = true;
258       node->second->AddCode(entry.code_def(), &id_to_string_);
259     }
260   }
261 }
262 
AddRunMeta(int64 step,std::unique_ptr<RunMetadata> run_meta)263 void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
264   if (!run_meta || !run_meta->has_step_stats()) {
265     fprintf(stderr, "Invalid RunMetadata for step %lld\n", step);
266     return;
267   }
268   if (steps_.find(step) == steps_.end()) {
269     steps_.insert(step);
270   }
271   steps_.insert(step);
272 
273   bool has_gpu_scheduling = false;
274   bool has_gpu_stream = false;
275 
276   for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
277     string dev = str_util::Lowercase(dev_stat.device());
278     if (IsPlacedOnAccelerator(dev)) {
279       has_gpu_scheduling = true;
280       if (CountAsAcceleratorTime(dev)) {
281         has_gpu_stream = true;
282       }
283     }
284     for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
285       string name = node_stat.node_name();
286       // Sometimes the node_name is suffixed with unnecessary information.
287       auto split_pos = node_stat.node_name().find(":");
288       if (split_pos != node_stat.node_name().npos) {
289         name = node_stat.node_name().substr(0, split_pos);
290       }
291       auto node = nodes_map_.find(name);
292       if (node == nodes_map_.end()) {
293         NodeDef def;
294         if (CreateRunMetadataNode(name, &def)) {
295           nodes_map_[name] = std::unique_ptr<TFGraphNode>(
296               new TFGraphNode(&def, nodes_map_.size(), &nodes_map_));
297           nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
298         }
299       } else {
300         covered_nodes_.insert(node->second->id());
301         node->second->AddStepStat(step, dev_stat.device(), node_stat);
302       }
303     }
304   }
305 
306   if (has_gpu_scheduling && !has_gpu_stream) {
307     miss_accelerator_stream_ = true;
308   }
309 }
310 
MaybeReportMissingTrace() const311 string TFStats::MaybeReportMissingTrace() const {
312   string report = "";
313   if (miss_accelerator_stream_) {
314     report +=
315         "\n\nFound accelerator operation but misses accelerator "
316         "stream stats!\n\n"
317         "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
318         "If you found your operation missing accelerator time, "
319         "consider filing a bug to xprof-dev@!\n\n";
320   }
321   return report;
322 }
323 
SerializeToString(string * content)324 void TFStats::SerializeToString(string* content) {
325   ProfileProto profile;
326   for (const auto& entry : id_to_string_) {
327     (*profile.mutable_id_to_string())[entry.first] = entry.second;
328   }
329   for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
330     if (it->second->id() < 0) {
331       continue;
332     }
333     (*profile.mutable_nodes())[it->second->id()].MergeFrom(
334         it->second->ToProto(nodes_map_));
335   }
336 
337   profile.set_has_trace(has_code_traces_);
338   profile.set_miss_accelerator_stream(miss_accelerator_stream_);
339   for (int64 s : steps_) {
340     profile.add_steps(s);
341   }
342   *content = profile.SerializeAsString();
343 }
344 
WriteProfile(const string & filename)345 void TFStats::WriteProfile(const string& filename) {
346   string content;
347   SerializeToString(&content);
348   Status s = WriteStringToFile(Env::Default(), filename, content);
349   if (!s.ok()) {
350     fprintf(stderr, "%s\n", s.ToString().c_str());
351   }
352 }
353 
Validate(const Options & opts) const354 bool TFStats::Validate(const Options& opts) const {
355   if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
356     fprintf(stderr,
357             "Options -step=%lld not found.\nAvailable steps: ", opts.step);
358     for (int64 s : steps_) {
359       fprintf(stderr, "%lld ", s);
360     }
361     fprintf(stderr, "\n");
362     return false;
363   }
364   return true;
365 }
366 
AddNodeForTest(int64 step,std::unique_ptr<TFGraphNode> node)367 void TFStats::AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node) {
368   steps_.insert(step);
369   nodes_map_[node->name()] = std::move(node);
370 }
371 }  // namespace tfprof
372 }  // namespace tensorflow
373