1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/profiler/internal/tfprof_op.h"
17
18 #include <stdio.h>
19
20 #include <utility>
21
22 #include "absl/strings/str_cat.h"
23 #include "absl/strings/str_format.h"
24 #include "tensorflow/core/platform/regexp.h"
25 #include "tensorflow/core/profiler/internal/tfprof_constants.h"
26 #include "tensorflow/core/profiler/internal/tfprof_tensor.h"
27
28 namespace tensorflow {
29 namespace tfprof {
30 namespace {
FormatToalExecTime(const ShowMultiNode * node,const ShowMultiNode * root)31 string FormatToalExecTime(const ShowMultiNode* node,
32 const ShowMultiNode* root) {
33 double accu_pct = 0.0;
34 double pct = 0.0;
35 if (node->proto().total_exec_micros() > 0) {
36 accu_pct = 100.0 * node->proto().total_exec_micros() /
37 root->proto().total_exec_micros();
38 pct =
39 100.0 * node->proto().exec_micros() / root->proto().total_exec_micros();
40 }
41
42 return absl::StrFormat(
43 "%30s",
44 absl::StrFormat("%s (%.2f%%, %.2f%%)",
45 FormatTime(node->proto().exec_micros()), accu_pct, pct));
46 }
FormatCPUExecTime(const ShowMultiNode * node,const ShowMultiNode * root)47 string FormatCPUExecTime(const ShowMultiNode* node, const ShowMultiNode* root) {
48 double accu_pct = 0.0;
49 double pct = 0.0;
50 if (node->proto().total_cpu_exec_micros() > 0) {
51 accu_pct = 100.0 * node->proto().total_cpu_exec_micros() /
52 root->proto().total_cpu_exec_micros();
53 pct = 100.0 * node->proto().cpu_exec_micros() /
54 root->proto().total_cpu_exec_micros();
55 }
56
57 return absl::StrFormat(
58 "%30s", absl::StrFormat("%s (%.2f%%, %.2f%%)",
59 FormatTime(node->proto().cpu_exec_micros()),
60 accu_pct, pct));
61 }
FormatAcceleratorExecTime(const ShowMultiNode * node,const ShowMultiNode * root)62 string FormatAcceleratorExecTime(const ShowMultiNode* node,
63 const ShowMultiNode* root) {
64 double accu_pct = 0.0;
65 double pct = 0.0;
66 if (node->proto().total_accelerator_exec_micros() > 0) {
67 accu_pct = 100.0 * node->proto().total_accelerator_exec_micros() /
68 root->proto().total_accelerator_exec_micros();
69 pct = 100.0 * node->proto().accelerator_exec_micros() /
70 root->proto().total_accelerator_exec_micros();
71 }
72
73 return absl::StrFormat(
74 "%30s",
75 absl::StrFormat("%s (%.2f%%, %.2f%%)",
76 FormatTime(node->proto().accelerator_exec_micros()),
77 accu_pct, pct));
78 }
79 } // namespace
80
AddNode(TFGraphNode * node)81 void TFOp::AddNode(TFGraphNode* node) {
82 const string& op = node->op();
83 if (tfcnodes_map_.find(op) == tfcnodes_map_.end()) {
84 tfcnodes_map_[op] =
85 std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(op));
86 }
87 TFMultiGraphNode* tfcnode = tfcnodes_map_[op].get();
88 tfcnode->AddGraphNode(node);
89 }
90
Build()91 void TFOp::Build() {
92 for (auto& tn : tfcnodes_map_) {
93 cnodes_map_[tn.first] =
94 std::unique_ptr<OpNode>(new OpNode(tn.second.get()));
95 }
96
97 tfcnodes_map_[kTFProfRoot] =
98 std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(kTFProfRoot));
99 root_.reset(new OpNode(tfcnodes_map_[kTFProfRoot].get()));
100 }
101
ShowInternal(const Options & opts,Timeline * timeline)102 const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
103 Timeline* timeline) {
104 root_->ResetTotalStats();
105 if (opts.output_type == kOutput[3]) {
106 absl::FPrintF(stderr, "Only 'code' view supports pprof output now.\n");
107 return root_.get();
108 }
109 if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
110 root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
111 }
112 if (timeline) {
113 absl::FPrintF(stderr,
114 "op view doesn't support timeline yet. "
115 "Consider graph/scope/code view.\n");
116 return root_.get();
117 }
118 if (cnodes_map_.empty()) {
119 return root_.get();
120 }
121
122 std::vector<OpNode*> nodes;
123 for (auto& n : cnodes_map_) {
124 n.second->account = ReAccount(n.second.get(), opts);
125 n.second->ResetTotalStats();
126 n.second->AddSelfToTotalStats();
127 nodes.push_back(n.second.get());
128 }
129 nodes = SortNodes(nodes, opts);
130 // pre keeps track of previous visited node.
131 OpNode* pre = nullptr;
132 std::vector<OpNode*> account_nodes;
133 for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) {
134 if ((*it)->account) {
135 if (pre) (*it)->AggregateTotalStats(pre);
136 account_nodes.push_back(*it);
137 pre = *it;
138 }
139 }
140 std::reverse(std::begin(account_nodes), std::end(account_nodes));
141 if (pre) {
142 root_->AggregateTotalStats(pre);
143 }
144
145 // Perform the display and optionally redo accounting.
146 int64 depth = 0;
147 std::vector<OpNode*> show_nodes;
148 int64 start = SearchRoot(account_nodes, opts.start_name_regexes);
149 for (int64 i = start, end = account_nodes.size(); i < end; ++i, ++depth) {
150 OpNode* n = account_nodes[i];
151 if (ShouldTrim(n, opts.trim_name_regexes) || depth > opts.max_depth) {
152 break;
153 }
154 n->show = ShouldShow(n, opts, depth);
155 if (n->show) show_nodes.push_back(n);
156 }
157
158 pre = nullptr;
159 for (auto it = show_nodes.rbegin(); it != show_nodes.rend(); ++it) {
160 if (opts.account_displayed_op_only) {
161 (*it)->ResetTotalStats();
162 (*it)->AddSelfToTotalStats();
163 if (pre) (*it)->AggregateTotalStats(pre);
164 }
165 pre = *it;
166 }
167 if (opts.account_displayed_op_only) {
168 root_->ResetTotalStats();
169 if (pre) {
170 root_->AggregateTotalStats(pre);
171 }
172 }
173 if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
174 string display_str = FormatLegend(opts);
175 for (OpNode* node : show_nodes) {
176 display_str += FormatNode(node, root_.get(), opts);
177 }
178 // In op view, we don't show root (total). But it will still in proto.
179 // TODO(xpan): Is it the right choice?
180 root_->formatted_str = display_str;
181 }
182 // Populate the children field.
183 auto* pre_pb = root_->mutable_proto();
184 for (auto& show_node : show_nodes) {
185 pre_pb->clear_children();
186 pre_pb->add_children()->Swap(show_node->mutable_proto());
187 pre_pb = pre_pb->mutable_children(0);
188 }
189 return root_.get();
190 }
191
SearchRoot(const std::vector<OpNode * > nodes,const std::vector<string> & regexes)192 int64 TFOp::SearchRoot(const std::vector<OpNode*> nodes,
193 const std::vector<string>& regexes) {
194 if (regexes.empty() || (regexes.size() == 1 && regexes[0] == ".*")) {
195 return 0;
196 }
197 int64 i = 0;
198 const int64 nodes_size = nodes.size();
199 for (; i < nodes_size; ++i) {
200 for (const string& regex : regexes) {
201 if (RE2::FullMatch(nodes[i]->name(), regex)) {
202 return i;
203 }
204 }
205 }
206 return i;
207 }
208
FormatMemoryNode(int64 node_total_bytes,int64 root_total_bytes,int64 node_bytes) const209 string TFOp::FormatMemoryNode(int64 node_total_bytes, int64 root_total_bytes,
210 int64 node_bytes) const {
211 double accu_pct = 0.0;
212 double pct = 0.0;
213 if (node_bytes > 0) {
214 accu_pct = 100.0 * node_total_bytes / root_total_bytes;
215 pct = 100.0 * node_bytes / root_total_bytes;
216 }
217 return absl::StrFormat(
218 "%30s", absl::StrFormat("%s (%.2f%%, %.2f%%)", FormatMemory(node_bytes),
219 accu_pct, pct));
220 }
221
FormatNode(OpNode * node,OpNode * root,const Options & opts) const222 string TFOp::FormatNode(OpNode* node, OpNode* root, const Options& opts) const {
223 std::vector<string> attrs;
224
225 if (opts.select.find(kShown[0]) != opts.select.end()) {
226 attrs.push_back(FormatMemoryNode(node->proto().total_requested_bytes(),
227 root->proto().total_requested_bytes(),
228 node->proto().requested_bytes()));
229 }
230
231 if (opts.select.find(kShown[11]) != opts.select.end()) {
232 attrs.push_back(FormatMemoryNode(node->proto().total_peak_bytes(),
233 root->proto().total_peak_bytes(),
234 node->proto().peak_bytes()));
235 }
236
237 if (opts.select.find(kShown[12]) != opts.select.end()) {
238 attrs.push_back(FormatMemoryNode(node->proto().total_residual_bytes(),
239 root->proto().total_residual_bytes(),
240 node->proto().residual_bytes()));
241 }
242 if (opts.select.find(kShown[13]) != opts.select.end()) {
243 attrs.push_back(FormatMemoryNode(node->proto().total_output_bytes(),
244 root->proto().total_output_bytes(),
245 node->proto().output_bytes()));
246 }
247
248 if (opts.select.find(kShown[1]) != opts.select.end()) {
249 attrs.push_back(FormatToalExecTime(node, root));
250 attrs.push_back(FormatAcceleratorExecTime(node, root));
251 attrs.push_back(FormatCPUExecTime(node, root));
252 }
253 if (opts.select.find(kShown[9]) != opts.select.end() &&
254 opts.select.find(kShown[1]) == opts.select.end()) {
255 attrs.push_back(FormatAcceleratorExecTime(node, root));
256 }
257 if (opts.select.find(kShown[10]) != opts.select.end() &&
258 opts.select.find(kShown[1]) == opts.select.end()) {
259 attrs.push_back(FormatCPUExecTime(node, root));
260 }
261 if (opts.select.find(kShown[2]) != opts.select.end()) {
262 double accu_pct = 0.0;
263 double pct = 0.0;
264 if (node->proto().total_parameters() > 0) {
265 accu_pct = 100.0 * node->proto().total_parameters() /
266 root->proto().total_parameters();
267 pct =
268 100.0 * node->proto().parameters() / root->proto().total_parameters();
269 }
270 attrs.push_back(absl::StrFormat(
271 "%30s", absl::StrFormat("%s params (%.2f%%, %.2f%%)",
272 FormatNumber(node->proto().parameters()),
273 accu_pct, pct)));
274 }
275
276 if (opts.select.find(kShown[3]) != opts.select.end()) {
277 double accu_pct = 0.0;
278 double pct = 0.0;
279 if (node->proto().total_float_ops() > 0) {
280 accu_pct = 100.0 * node->proto().total_float_ops() /
281 root->proto().total_float_ops();
282 pct = 100.0 * node->proto().float_ops() / root->proto().total_float_ops();
283 }
284
285 attrs.push_back(absl::StrFormat(
286 "%30s", absl::StrFormat("%s float_ops (%.2f%%, %.2f%%)",
287 FormatNumber(node->proto().float_ops()),
288 accu_pct, pct)));
289 }
290
291 if (opts.select.find(kShown[5]) != opts.select.end()) {
292 attrs.push_back(absl::StrJoin(node->node->devices(), "|"));
293 }
294
295 if (opts.select.find(kShown[6]) != opts.select.end()) {
296 std::set<string> op_types = node->node->op_types();
297 attrs.push_back(absl::StrJoin(op_types, "|"));
298 }
299
300 if (opts.select.find(kShown[7]) != opts.select.end()) {
301 int64 total_runs = 0;
302 for (const auto& gnode : node->proto().graph_nodes()) {
303 total_runs += gnode.run_count();
304 }
305 attrs.push_back(absl::StrFormat(
306 "%10s", absl::StrFormat("%d|%d", total_runs,
307 node->proto().graph_nodes_size())));
308 }
309
310 string node_str =
311 absl::StrFormat("%-25s%s\n", node->name(), absl::StrJoin(attrs, ", "));
312
313 if (opts.select.find(kShown[8]) != opts.select.end()) {
314 string input_shape_str = FormatInputShapes(node->proto());
315 if (!input_shape_str.empty()) {
316 node_str = absl::StrFormat("%s\n%s\n\n", node_str, input_shape_str);
317 }
318 }
319 return node_str;
320 }
321 } // namespace tfprof
322 } // namespace tensorflow
323