1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/internal/tfprof_stats.h"
17
18 #include <stdio.h>
19 #include <utility>
20
21 #include "tensorflow/core/framework/step_stats.pb.h"
22 #include "tensorflow/core/lib/strings/str_util.h"
23 #include "tensorflow/core/platform/regexp.h"
24 #include "tensorflow/core/profiler/internal/tfprof_timeline.h"
25
26 namespace tensorflow {
27 namespace tfprof {
28 namespace {
29
30 const char* const kProfilePrefix = "Profile:\n";
31
CreateRunMetadataNode(const string & name,NodeDef * def)32 bool CreateRunMetadataNode(const string& name, NodeDef* def) {
33 // TODO(xpan): Better solution than blacklisting this 2 nodes. They
34 // actually cost some resources, maybe include them. Some nodes, such
35 // as _SOURCE appear in multiple devices, which breaks tfprof's assumption.
36 if (name == "RecvTensor" || name == "_SOURCE" ||
37 name.find("MEMCPY") != name.npos) {
38 return false;
39 }
40 def->set_name(name);
41 // TODO(xpan): Better operation type.
42 // This is because some times a node doesn't have a op type,
43 // so we use node name as the op type.
44 def->set_op(name);
45 return true;
46 }
47 } // namespace
48
TFStats(std::unique_ptr<GraphDef> graph,std::unique_ptr<RunMetadata> run_meta,std::unique_ptr<OpLogProto> op_log,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)49 TFStats::TFStats(std::unique_ptr<GraphDef> graph,
50 std::unique_ptr<RunMetadata> run_meta,
51 std::unique_ptr<OpLogProto> op_log,
52 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
53 : has_code_traces_(false),
54 miss_accelerator_stream_(false),
55 ckpt_reader_(std::move(ckpt_reader)) {
56 CHECK(graph) << "Must at least have GraphDef";
57
58 printf("Parsing Inputs...\n");
59 AddGraph(std::move(graph));
60 if (run_meta && run_meta->has_step_stats()) {
61 AddRunMeta(0, std::move(run_meta));
62 }
63 AddOpLogProto(std::move(op_log));
64
65 if (ckpt_reader_) {
66 for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
67 auto node = nodes_map_.find(v.first);
68 if (node != nodes_map_.end()) {
69 node->second->AddOpType("_checkpoint_variables");
70 }
71 }
72 }
73 }
74
TFStats(const string & filename,std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)75 TFStats::TFStats(const string& filename,
76 std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
77 : has_code_traces_(false),
78 miss_accelerator_stream_(false),
79 ckpt_reader_(std::move(ckpt_reader)) {
80 string str;
81 Status s = ReadFileToString(Env::Default(), filename, &str);
82 if (!s.ok()) {
83 fprintf(stderr, "Failed to read profile: %s", s.ToString().c_str());
84 return;
85 }
86
87 ProfileProto profile;
88 if (!profile.ParseFromString(str)) {
89 fprintf(stderr, "Failed to parse profile\n");
90 return;
91 }
92 for (const auto& entry : profile.id_to_string()) {
93 id_to_string_[entry.first] = entry.second;
94 }
95 for (const auto& node_pb : profile.nodes()) {
96 std::unique_ptr<TFGraphNode> node(
97 new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_));
98 nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
99 node_pb.second.name(), std::move(node)));
100 }
101 has_code_traces_ = profile.has_trace();
102 for (int64 s : profile.steps()) {
103 steps_.insert(s);
104 }
105 }
106
BuildView(const string & cmd)107 void TFStats::BuildView(const string& cmd) {
108 if (cmd == kCmds[0] && !scope_view_) {
109 scope_view_.reset(new TFScope(ckpt_reader_.get()));
110 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
111 scope_view_->AddNode(it->second.get());
112 }
113 scope_view_->Build();
114 }
115 if (cmd == kCmds[1] && !graph_view_) {
116 graph_view_.reset(new TFGraph(ckpt_reader_.get()));
117 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
118 graph_view_->AddNode(it->second.get());
119 }
120 graph_view_->Build();
121 }
122 if (cmd == kCmds[2] && !code_view_) {
123 code_view_.reset(new TFCode());
124 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
125 code_view_->AddNode(it->second.get());
126 }
127 code_view_->Build();
128 }
129 if (cmd == kCmds[3] && !op_view_) {
130 op_view_.reset(new TFOp());
131 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
132 op_view_->AddNode(it->second.get());
133 }
134 op_view_->Build();
135 }
136 }
137
BuildAllViews()138 void TFStats::BuildAllViews() {
139 std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
140 for (const string& cmd : cmds_str) {
141 BuildView(cmd);
142 }
143 }
144
ShowGraphNode(const string & cmd,const Options & opts) const145 const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
146 const Options& opts) const {
147 if (!Validate(opts)) {
148 return empty_graph_node_;
149 }
150 string prefix = MaybeReportMissingTrace();
151 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
152
153 if (cmd == kCmds[0]) {
154 return scope_view_->Show(prefix, opts);
155 } else if (cmd == kCmds[1]) {
156 if (opts.step < 0 && opts.output_type == kOutput[0]) {
157 for (int64 step : steps_) {
158 Options nopts = opts;
159 nopts.step = step;
160 graph_view_->Show(prefix, nopts);
161 }
162 return empty_graph_node_;
163 }
164 return graph_view_->Show(prefix, opts);
165 } else {
166 fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
167 return empty_graph_node_;
168 }
169 }
170
ShowMultiGraphNode(const string & cmd,const Options & opts) const171 const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
172 const string& cmd, const Options& opts) const {
173 if (!Validate(opts)) {
174 return empty_multi_graph_node_;
175 }
176 string prefix = MaybeReportMissingTrace();
177 prefix += QueryDoc(cmd, opts) + kProfilePrefix;
178
179 if (cmd == kCmds[2]) {
180 if (!has_code_traces()) {
181 fprintf(stderr, "No code trace information\n");
182 return empty_multi_graph_node_;
183 }
184 return code_view_->Show(prefix, opts);
185 } else if (cmd == kCmds[3]) {
186 return op_view_->Show(prefix, opts);
187 } else {
188 fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
189 return empty_multi_graph_node_;
190 }
191 }
192
AddGraph(std::unique_ptr<GraphDef> graph)193 void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) {
194 std::map<string, const NodeDef*> node_defs;
195 bool node_added = false;
196 for (const NodeDef& node : graph->node()) {
197 if (nodes_map_.find(node.name()) != nodes_map_.end()) {
198 continue;
199 }
200 node_added = true;
201 nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>(
202 new TFGraphNode(&node, nodes_map_.size(), &nodes_map_));
203 node_defs[node.name()] = &node;
204 }
205 for (auto it = node_defs.begin(); it != node_defs.end(); it++) {
206 TFGraphNode* node = nodes_map_.at(it->first).get();
207 for (int i = 0; i < it->second->input_size(); ++i) {
208 string node_input = it->second->input(i);
209 int output_idx = 0;
210 // input name format can be: "^node:src_output"
211 // if not :src_output, then it's the first one (further verify?)
212 auto prefix_pos = node_input.find(":");
213 if (prefix_pos != node_input.npos) {
214 std::vector<string> input_parts = str_util::Split(node_input, ":");
215 CHECK(input_parts.size() == 2)
216 << "Unknown NodeDef.input format: " << node_input;
217 node_input = input_parts[0];
218 CHECK(strings::safe_strto32(input_parts[1], &output_idx))
219 << "Failed to parse integer: " << output_idx;
220 }
221 if (node_input.substr(0, 1) == "^") {
222 node_input = node_input.substr(1);
223 }
224 // Delay input TFGraphNode retrieval as late as possible.
225 // In long run, when we have TensorFlow runtime graph, the
226 // graph connection should be dynamic and per-step.
227 node->AddInput(node_input, output_idx, i);
228 }
229 }
230 if (node_added) {
231 graph_view_.reset(nullptr);
232 scope_view_.reset(nullptr);
233 op_view_.reset(nullptr);
234 code_view_.reset(nullptr);
235 }
236 }
237
AddOpLogProto(std::unique_ptr<OpLogProto> op_log)238 void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
239 if (!op_log) {
240 return;
241 }
242 for (const auto& entry : op_log->id_to_string()) {
243 if (id_to_string_.find(entry.first) == id_to_string_.end()) {
244 id_to_string_[entry.first] = entry.second;
245 }
246 }
247 for (const OpLogEntry& entry : op_log->log_entries()) {
248 auto node = nodes_map_.find(entry.name());
249 if (node == nodes_map_.end()) continue;
250 for (const string& type : entry.types()) {
251 node->second->AddOpType(type);
252 }
253 if (entry.float_ops()) {
254 node->second->AddFloatOps(entry.float_ops());
255 }
256 if (entry.has_code_def()) {
257 has_code_traces_ = true;
258 node->second->AddCode(entry.code_def(), &id_to_string_);
259 }
260 }
261 }
262
AddRunMeta(int64 step,std::unique_ptr<RunMetadata> run_meta)263 void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
264 if (!run_meta || !run_meta->has_step_stats()) {
265 fprintf(stderr, "Invalid RunMetadata for step %lld\n", step);
266 return;
267 }
268 if (steps_.find(step) == steps_.end()) {
269 steps_.insert(step);
270 }
271 steps_.insert(step);
272
273 bool has_gpu_scheduling = false;
274 bool has_gpu_stream = false;
275
276 for (const auto& dev_stat : run_meta->step_stats().dev_stats()) {
277 string dev = str_util::Lowercase(dev_stat.device());
278 if (IsPlacedOnAccelerator(dev)) {
279 has_gpu_scheduling = true;
280 if (CountAsAcceleratorTime(dev)) {
281 has_gpu_stream = true;
282 }
283 }
284 for (const NodeExecStats& node_stat : dev_stat.node_stats()) {
285 string name = node_stat.node_name();
286 // Sometimes the node_name is suffixed with unnecessary information.
287 auto split_pos = node_stat.node_name().find(":");
288 if (split_pos != node_stat.node_name().npos) {
289 name = node_stat.node_name().substr(0, split_pos);
290 }
291 auto node = nodes_map_.find(name);
292 if (node == nodes_map_.end()) {
293 NodeDef def;
294 if (CreateRunMetadataNode(name, &def)) {
295 nodes_map_[name] = std::unique_ptr<TFGraphNode>(
296 new TFGraphNode(&def, nodes_map_.size(), &nodes_map_));
297 nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat);
298 }
299 } else {
300 covered_nodes_.insert(node->second->id());
301 node->second->AddStepStat(step, dev_stat.device(), node_stat);
302 }
303 }
304 }
305
306 if (has_gpu_scheduling && !has_gpu_stream) {
307 miss_accelerator_stream_ = true;
308 }
309 }
310
MaybeReportMissingTrace() const311 string TFStats::MaybeReportMissingTrace() const {
312 string report = "";
313 if (miss_accelerator_stream_) {
314 report +=
315 "\n\nFound accelerator operation but misses accelerator "
316 "stream stats!\n\n"
317 "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
318 "If you found your operation missing accelerator time, "
319 "consider filing a bug to xprof-dev@!\n\n";
320 }
321 return report;
322 }
323
SerializeToString(string * content)324 void TFStats::SerializeToString(string* content) {
325 ProfileProto profile;
326 for (const auto& entry : id_to_string_) {
327 (*profile.mutable_id_to_string())[entry.first] = entry.second;
328 }
329 for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
330 if (it->second->id() < 0) {
331 continue;
332 }
333 (*profile.mutable_nodes())[it->second->id()].MergeFrom(
334 it->second->ToProto(nodes_map_));
335 }
336
337 profile.set_has_trace(has_code_traces_);
338 profile.set_miss_accelerator_stream(miss_accelerator_stream_);
339 for (int64 s : steps_) {
340 profile.add_steps(s);
341 }
342 *content = profile.SerializeAsString();
343 }
344
WriteProfile(const string & filename)345 void TFStats::WriteProfile(const string& filename) {
346 string content;
347 SerializeToString(&content);
348 Status s = WriteStringToFile(Env::Default(), filename, content);
349 if (!s.ok()) {
350 fprintf(stderr, "%s\n", s.ToString().c_str());
351 }
352 }
353
Validate(const Options & opts) const354 bool TFStats::Validate(const Options& opts) const {
355 if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
356 fprintf(stderr,
357 "Options -step=%lld not found.\nAvailable steps: ", opts.step);
358 for (int64 s : steps_) {
359 fprintf(stderr, "%lld ", s);
360 }
361 fprintf(stderr, "\n");
362 return false;
363 }
364 return true;
365 }
366
AddNodeForTest(int64 step,std::unique_ptr<TFGraphNode> node)367 void TFStats::AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node) {
368 steps_.insert(step);
369 nodes_map_[node->name()] = std::move(node);
370 }
371 } // namespace tfprof
372 } // namespace tensorflow
373