1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17 
18 #include <stdio.h>
19 
20 #include <utility>
21 
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_format.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/core/framework/step_stats.pb.h"
26 #include "tensorflow/core/platform/regexp.h"
27 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
28 
29 namespace tensorflow {
30 namespace tfprof {
31 namespace {
32 
33 const char* const kProfilePrefix = "Profile:\n";
34 
CreateRunMetadataNode(const string & name,NodeDef * def)35 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
36   // TODO(xpan): Better solution than denylisting this 2 nodes. They
37   // actually cost some resources, maybe include them. Some nodes, such
38   // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
39   if (name == "RecvTensor" || name == "_SOURCE" ||
40       name.find("MEMCPY") != name.npos) {
41     return false;
42   }
43   def->set_name(name);
44   // TODO(xpan): Better operation type.
45   // This is because some times a node doesn't have a op type,
46   // so we use node name as the op type.
47   def->set_op(name);
48   return true;
49 }
50 }  // namespace
51 
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)52 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
53                  std::unique_ptr<RunMetadata> run_meta,
54                  std::unique_ptr<OpLogProto> op_log,
55                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
56     : has_code_traces_(false),
57       miss_accelerator_stream_(false),
58       ckpt_reader_(std::move(ckpt_reader)) {
59   CHECK(graph) << "Must at least have GraphDef";
60 
61   AddGraph(std::move(graph));
62   if (run_meta && run_meta->has_step_stats()) {
63     AddRunMeta(0, std::move(run_meta));
64   }
65   AddOpLogProto(std::move(op_log));
66 
67   if (ckpt_reader_) {
68     for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
69       auto node = nodes_map_.find(v.first);
70       if (node != nodes_map_.end()) {
71         node->second->AddOpType("_checkpoint_variables");
72       }
73     }
74   }
75 }
76 
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)77 TFStats::TFStats(const string& filename,
78                  std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
79     : has_code_traces_(false),
80       miss_accelerator_stream_(false),
81       ckpt_reader_(std::move(ckpt_reader)) {
82   string str;
83   Status s = ReadFileToString(Env::Default(), filename, &str);
84   if (!s.ok()) {
85     absl::FPrintF(stderr, "Failed to read profile: %s", s.ToString());
86     return;
87   }
88 
89   ProfileProto profile;
90   if (!profile.ParseFromString(str)) {
91     absl::FPrintF(stderr, "Failed to parse profile\n");
92     return;
93   }
94   for (const auto& entry : profile.id_to_string()) {
95     id_to_string_[entry.first] = entry.second;
96   }
97   for (const auto& node_pb : profile.nodes()) {
98     std::unique_ptr<TFGraphNode> node(
99         new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
100     nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
101         node_pb.second.name(), std::move(node)));
102   }
103   has_code_traces_ = profile.has_trace();
104   for (int64 s : profile.steps()) {
105     steps_.insert(s);
106   }
107 }
108 
BuildView(const string & cmd)109 void TFStats::BuildView(const string& cmd) {
110   if (cmd == kCmds[0] && !scope_view_) {
111     scope_view_.reset(new TFScope(ckpt_reader_.get()));
112     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
113       scope_view_->AddNode(it->second.get());
114     }
115     scope_view_->Build();
116   }
117   if (cmd == kCmds[1] && !graph_view_) {
118     graph_view_.reset(new TFGraph(ckpt_reader_.get()));
119     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
120       graph_view_->AddNode(it->second.get());
121     }
122     graph_view_->Build();
123   }
124   if (cmd == kCmds[2] && !code_view_) {
125     code_view_.reset(new TFCode());
126     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
127       code_view_->AddNode(it->second.get());
128     }
129     code_view_->Build();
130   }
131   if (cmd == kCmds[3] && !op_view_) {
132     op_view_.reset(new TFOp());
133     for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
134       op_view_->AddNode(it->second.get());
135     }
136     op_view_->Build();
137   }
138 }
139 
BuildAllViews()140 void TFStats::BuildAllViews() {
141   std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
142   for (const string& cmd : cmds_str) {
143     BuildView(cmd);
144   }
145 }
146 
ShowGraphNode(const string & cmd,const Options & opts) const147 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
148                                              const Options& opts) const {
149   if (!Validate(opts)) {
150     return empty_graph_node_;
151   }
152   string prefix = MaybeReportMissingTrace();
153   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
154 
155   if (cmd == kCmds[0]) {
156     return scope_view_->Show(prefix, opts);
157   } else if (cmd == kCmds[1]) {
158     if (opts.step < 0 && opts.output_type == kOutput[0]) {
159       for (int64 step : steps_) {
160         Options nopts = opts;
161         nopts.step = step;
162         graph_view_->Show(prefix, nopts);
163       }
164       return empty_graph_node_;
165     }
166     return graph_view_->Show(prefix, opts);
167   } else {
168     absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
169     return empty_graph_node_;
170   }
171 }
172 
ShowMultiGraphNode(const string & cmd,const Options & opts) const173 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
174     const string& cmd, const Options& opts) const {
175   if (!Validate(opts)) {
176     return empty_multi_graph_node_;
177   }
178   string prefix = MaybeReportMissingTrace();
179   prefix += QueryDoc(cmd, opts) + kProfilePrefix;
180 
181   if (cmd == kCmds[2]) {
182     if (!has_code_traces()) {
183       absl::FPrintF(stderr, "No code trace information\n");
184       return empty_multi_graph_node_;
185     }
186     return code_view_->Show(prefix, opts);
187   } else if (cmd == kCmds[3]) {
188     return op_view_->Show(prefix, opts);
189   } else {
190     absl::FPrintF(stderr, "Unknown command: %s\n", cmd);
191     return empty_multi_graph_node_;
192   }
193 }
194 
AddGraph(std::unique_ptr<GraphDef> graph)195 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
196   std::map<string, const NodeDef*> node_defs;
197   bool node_added = false;
198   for (const NodeDef& node : graph->node()) {
199     if (nodes_map_.find(node.name()) != nodes_map_.end()) {
200       continue;
201     }
202     node_added = true;
203     size_t num_nodes = nodes_map_.size();
204     nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
205         new TFGraphNode(&node, num_nodes, &nodes_map_));
206     node_defs[node.name()] = &node;
207   }
208   for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
209     TFGraphNode* node = nodes_map_.at(it->first).get();
210     for (int i = 0; i < it->second->input_size(); ++i) {
211       string node_input = it->second->input(i);
212       int output_idx = 0;
213       // input name format can be: "^node:src_output"
214       // if not :src_output, then it's the first one (further verify?)
215       auto prefix_pos = node_input.find(':');
216       if (prefix_pos != node_input.npos) {
217         std::vector<string> input_parts = absl::StrSplit(node_input, ':');
218         DCHECK(input_parts.size() == 2)
219             << "Unknown NodeDef.input format: " << node_input;
220         node_input = input_parts[0];
221         DCHECK(absl::SimpleAtoi(input_parts[1], &output_idx))
222             << "Failed to parse integer: " << output_idx;
223       }
224       if (node_input.substr(0, 1) == "^") {
225         node_input = node_input.substr(1);
226       }
227       // Delay input TFGraphNode retrieval as late as possible.
228       // In long run, when we have TensorFlow runtime graph, the
229       // graph connection should be dynamic and per-step.
230       node->AddInput(node_input, output_idx, i);
231     }
232   }
233   if (node_added) {
234     graph_view_.reset(nullptr);
235     scope_view_.reset(nullptr);
236     op_view_.reset(nullptr);
237     code_view_.reset(nullptr);
238   }
239 }
240 
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)241 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
242   if (!op_log) {
243     return;
244   }
245   for (const auto& entry : op_log->id_to_string()) {
246     if (id_to_string_.find(entry.first) == id_to_string_.end()) {
247       id_to_string_[entry.first] = entry.second;
248     }
249   }
250   for (const OpLogEntry& entry : op_log->log_entries()) {
251     auto node = nodes_map_.find(entry.name());
252     if (node == nodes_map_.end()) continue;
253     for (const string& type : entry.types()) {
254       node->second->AddOpType(type);
255     }
256     if (entry.float_ops()) {
257       node->second->AddFloatOps(entry.float_ops());
258     }
259     if (entry.has_code_def()) {
260       has_code_traces_ = true;
261       node->second->AddCode(entry.code_def(), &id_to_string_);
262     }
263   }
264 }
265 
AddRunMeta(int64 step,std::unique_ptr<RunMetadata> run_meta)266 void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
267   if (!run_meta || !run_meta->has_step_stats()) {
268     absl::FPrintF(stderr, "Invalid RunMetadata for step %d\n", step);
269     return;
270   }
271   if (steps_.find(step) == steps_.end()) {
272     steps_.insert(step);
273   }
274   steps_.insert(step);
275 
276   bool has_gpu_scheduling = false;
277   bool has_gpu_stream = false;
278 
279   for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
280     string dev = absl::AsciiStrToLower(dev_stat.device());
281     if (IsPlacedOnAccelerator(dev)) {
282       has_gpu_scheduling = true;
283       if (CountAsAcceleratorTime(dev)) {
284         has_gpu_stream = true;
285       }
286     }
287     for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
288       string name = node_stat.node_name();
289       // Sometimes the node_name is suffixed with unnecessary information.
290       auto split_pos = node_stat.node_name().find(':');
291       if (split_pos != node_stat.node_name().npos) {
292         name = node_stat.node_name().substr(0, split_pos);
293       }
294       auto node = nodes_map_.find(name);
295       if (node == nodes_map_.end()) {
296         NodeDef def;
297         if (CreateRunMetadataNode(name, &def)) {
298           size_t num_nodes = nodes_map_.size();
299           nodes_map_[name] = std::unique_ptr<TFGraphNode>(
300               new TFGraphNode(&def, num_nodes, &nodes_map_));
301           nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
302         }
303       } else {
304         covered_nodes_.insert(node->second->id());
305         node->second->AddStepStat(step, dev_stat.device(), node_stat);
306       }
307     }
308   }
309 
310   if (has_gpu_scheduling && !has_gpu_stream) {
311     miss_accelerator_stream_ = true;
312   }
313 }
314 
MaybeReportMissingTrace() const315 string TFStats::MaybeReportMissingTrace() const {
316   string report = "";
317   if (miss_accelerator_stream_) {
318     report +=
319         "\n\nFound accelerator operation but misses accelerator "
320         "stream stats!\n\n"
321         "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
322         "If you found your operation missing accelerator time, "
323         "consider to post to discuss@tensorflow.org!\n\n";
324   }
325   return report;
326 }
327 
SerializeToString(string * content)328 void TFStats::SerializeToString(string* content) {
329   ProfileProto profile;
330   for (const auto& entry : id_to_string_) {
331     (*profile.mutable_id_to_string())[entry.first] = entry.second;
332   }
333   for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
334     if (it->second->id() < 0) {
335       continue;
336     }
337     (*profile.mutable_nodes())[it->second->id()].MergeFrom(
338         it->second->ToProto(nodes_map_));
339   }
340 
341   profile.set_has_trace(has_code_traces_);
342   profile.set_miss_accelerator_stream(miss_accelerator_stream_);
343   for (int64 s : steps_) {
344     profile.add_steps(s);
345   }
346   *content = profile.SerializeAsString();
347 }
348 
WriteProfile(const string & filename)349 void TFStats::WriteProfile(const string& filename) {
350   string content;
351   SerializeToString(&content);
352   Status s = WriteStringToFile(Env::Default(), filename, content);
353   if (!s.ok()) {
354     absl::FPrintF(stderr, "%s\n", s.ToString());
355   }
356 }
357 
Validate(const Options & opts) const358 bool TFStats::Validate(const Options& opts) const {
359   if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
360     absl::FPrintF(stderr,
361                   "Options -step=%d not found.\nAvailable steps: ", opts.step);
362     for (int64 s : steps_) {
363       absl::FPrintF(stderr, "%d ", s);
364     }
365     absl::FPrintF(stderr, "\n");
366     return false;
367   }
368   return true;
369 }
370 
AddNodeForTest(int64 step,std::unique_ptr<TFGraphNode> node)371 void TFStats::AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node) {
372   steps_.insert(step);
373   nodes_map_[node->name()] = std::move(node);
374 }
375 }  // namespace tfprof
376 }  // namespace tensorflow
377