1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
17 
18 #include <memory>
19 #include <set>
20 
21 #include "absl/strings/str_cat.h"
22 #include "absl/strings/str_format.h"
23 #include "tensorflow/core/platform/env.h"
24 #include "tensorflow/core/platform/regexp.h"
25 #include "tensorflow/core/profiler/internal/tfprof_scope.h"
26 
27 namespace tensorflow {
28 namespace tfprof {
29 
Show(const string & prefix,const Options & opts)30 const MultiGraphNodeProto& TFMultiShow::Show(const string& prefix,
31                                              const Options& opts) {
32   if (opts.output_type == kOutput[0]) {
33     Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
34     return ShowInternal(opts, &timeline)->proto();
35   } else {
36     const ShowMultiNode* ret = ShowInternal(opts, nullptr);
37     if (opts.output_type == kOutput[1]) {
38       absl::PrintF("%s%s", prefix, ret->formatted_str);
39       fflush(stdout);
40     } else if (opts.output_type == kOutput[2]) {
41       Status s = WriteStringToFile(Env::Default(),
42                                    opts.output_options.at(kFileOpts[0]),
43                                    prefix + ret->formatted_str);
44       if (!s.ok()) {
45         absl::FPrintF(stderr, "%s\n", s.ToString());
46       }
47     } else if (opts.output_type == kOutput[3] ||
48                opts.output_type == kOutput[4]) {
49     } else {
50       absl::FPrintF(stderr, "Unknown output type: %s\n", opts.output_type);
51     }
52     return ret->proto();
53   }
54 }
55 
ShouldShow(const ShowMultiNode * node,const Options & opts,int depth) const56 bool TFMultiShow::ShouldShow(const ShowMultiNode* node, const Options& opts,
57                              int depth) const {
58   // Always show kTFProfRoot.
59   if (node->name() == kTFProfRoot) return true;
60 
61   // TODO(xpan): Think more carefully about node filtering in code view.
62   // Unlike graph/scope view, which users want to see the exact leaf op.
63   // In code view, users want to see the middle code traces they wrote.
64   //
65   // This is a subtle difference from scope/graph view. Usually mostly
66   // want to see the middle code traces (i.e. their own codes.), instead
67   // of the TensorFlow internal codes traces.
68   if (node->proto().total_requested_bytes() < opts.min_bytes ||
69       node->proto().total_peak_bytes() < opts.min_peak_bytes ||
70       node->proto().total_residual_bytes() < opts.min_residual_bytes ||
71       node->proto().total_output_bytes() < opts.min_output_bytes ||
72       node->proto().total_exec_micros() < opts.min_micros ||
73       node->proto().total_accelerator_exec_micros() <
74           opts.min_accelerator_micros ||
75       node->proto().total_cpu_exec_micros() < opts.min_cpu_micros ||
76       node->proto().total_parameters() < opts.min_params ||
77       node->proto().total_float_ops() < opts.min_float_ops ||
78       depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
79     return false;
80   }
81 
82   bool show = false;
83   if (opts.show_name_regexes.size() == 1 && opts.show_name_regexes[0] == ".*") {
84     show = true;
85   } else {
86     for (const string& regex : opts.show_name_regexes) {
87       if (RE2::FullMatch(node->name(), regex)) {
88         show = true;
89         break;
90       }
91     }
92   }
93   // Don't show if show_name_regexes don't cover it.
94   if (!show) return false;
95   // Don't show if hide_name_regexes cover it.
96   for (const string& regex : opts.hide_name_regexes) {
97     if (RE2::FullMatch(node->name(), regex)) return false;
98   }
99   return true;
100 }
101 
ShouldTrim(const ShowMultiNode * node,const std::vector<string> & regexes) const102 bool TFMultiShow::ShouldTrim(const ShowMultiNode* node,
103                              const std::vector<string>& regexes) const {
104   for (const string& regex : regexes) {
105     if (RE2::FullMatch(node->name(), regex)) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
ReAccount(ShowMultiNode * node,const Options & opts)112 bool TFMultiShow::ReAccount(ShowMultiNode* node, const Options& opts) {
113   return node->ReInit(opts.step, opts.account_type_regexes);
114 }
115 
FormatLegend(const Options & opts) const116 string TFMultiShow::FormatLegend(const Options& opts) const {
117   std::vector<string> legends;
118   if (opts.select.find(kShown[0]) != opts.select.end()) {
119     legends.push_back("requested bytes");
120   }
121   if (opts.select.find(kShown[11]) != opts.select.end()) {
122     legends.push_back("peak bytes");
123   }
124   if (opts.select.find(kShown[12]) != opts.select.end()) {
125     legends.push_back("residual bytes");
126   }
127   if (opts.select.find(kShown[13]) != opts.select.end()) {
128     legends.push_back("output bytes");
129   }
130   if (opts.select.find(kShown[1]) != opts.select.end()) {
131     legends.push_back("total execution time");
132     legends.push_back("accelerator execution time");
133     legends.push_back("cpu execution time");
134   }
135   if (opts.select.find(kShown[9]) != opts.select.end() &&
136       opts.select.find(kShown[1]) == opts.select.end()) {
137     legends.push_back("accelerator execution time");
138   }
139   if (opts.select.find(kShown[10]) != opts.select.end() &&
140       opts.select.find(kShown[1]) == opts.select.end()) {
141     legends.push_back("cpu execution time");
142   }
143   if (opts.select.find(kShown[2]) != opts.select.end()) {
144     legends.push_back("# parameters");
145   }
146   if (opts.select.find(kShown[3]) != opts.select.end()) {
147     legends.push_back("# float_ops");
148   }
149   if (opts.select.find(kShown[5]) != opts.select.end()) {
150     legends.push_back("assigned devices");
151   }
152   if (opts.select.find(kShown[6]) != opts.select.end()) {
153     legends.push_back("op types");
154   }
155   if (opts.select.find(kShown[7]) != opts.select.end()) {
156     legends.push_back("op occurrence (run|defined)");
157   }
158   if (opts.select.find(kShown[8]) != opts.select.end()) {
159     legends.push_back("input shapes");
160   }
161   return absl::StrFormat("node name | %s\n", absl::StrJoin(legends, " | "));
162 }
163 
FormatInputShapes(const MultiGraphNodeProto & proto) const164 string TFMultiShow::FormatInputShapes(const MultiGraphNodeProto& proto) const {
165   // input_shape string -> (static defined count, run count, run_micros)
166   std::map<string, std::tuple<int64, int64, int64>> input_shapes_attr;
167   for (int i = 0; i < proto.graph_nodes_size(); ++i) {
168     const GraphNodeProto& gnode = proto.graph_nodes(i);
169     // Convert and sort by input_idx.
170     std::map<int, std::vector<int64>> input_shapes;
171     for (const auto& inp : gnode.input_shapes()) {
172       input_shapes[inp.first] = ShapeProtoToVec(inp.second);
173     }
174 
175     std::vector<string> input_vec;
176     for (const auto& s : input_shapes) {
177       if (s.second.empty()) {
178         input_vec.push_back(absl::StrFormat("%d:unknown", s.first));
179       } else {
180         input_vec.push_back(
181             absl::StrFormat("%d:%s", s.first, absl::StrJoin(s.second, "x")));
182       }
183     }
184     string shape_type_str =
185         absl::StrFormat("input_type: %s", absl::StrJoin(input_vec, ",\t"));
186     auto t = input_shapes_attr.find(shape_type_str);
187     if (t == input_shapes_attr.end()) {
188       input_shapes_attr.insert(
189           std::make_pair(shape_type_str, std::make_tuple(0, 0, 0)));
190       t = input_shapes_attr.find(shape_type_str);
191     }
192     input_shapes_attr[shape_type_str] = std::make_tuple(
193         std::get<0>(t->second) + 1, std::get<1>(t->second) + gnode.run_count(),
194         std::get<2>(t->second) + gnode.exec_micros());
195   }
196   if (input_shapes_attr.empty()) {
197     return "";
198   }
199 
200   std::vector<std::pair<string, std::tuple<int64, int64, int64>>>
201       shape_count_vec(input_shapes_attr.begin(), input_shapes_attr.end());
202   std::sort(
203       shape_count_vec.begin(), shape_count_vec.end(),
204       [](const std::pair<const string, std::tuple<int64, int64, int64>>& a,
205          const std::pair<const string, std::tuple<int64, int64, int64>>& b) {
206         return std::get<1>(a.second) > std::get<1>(b.second);
207       });
208 
209   std::vector<string> input_types;
210   input_types.reserve(shape_count_vec.size());
211   for (const auto& s : shape_count_vec) {
212     std::tuple<int64, int64, int64> t = s.second;
213     input_types.push_back(absl::StrFormat(
214         "%s\t(run*%d|defined*%d)\texec_time: %s", s.first, std::get<1>(t),
215         std::get<0>(t), FormatTime(std::get<2>(t))));
216   }
217   return absl::StrJoin(input_types, "\n");
218 }
219 
FormatTimes(const ShowMultiNode * node,const Options & opts) const220 std::vector<string> TFMultiShow::FormatTimes(const ShowMultiNode* node,
221                                              const Options& opts) const {
222   std::vector<string> attrs;
223   if (opts.select.find(kShown[1]) != opts.select.end()) {
224     attrs.push_back(FormatTotalExecTime(node, opts));
225     attrs.push_back(FormatAcceleratorExecTime(node, opts));
226     attrs.push_back(FormatCPUExecTime(node, opts));
227   }
228   if (opts.select.find(kShown[9]) != opts.select.end() &&
229       opts.select.find(kShown[1]) == opts.select.end()) {
230     attrs.push_back(FormatAcceleratorExecTime(node, opts));
231   }
232   if (opts.select.find(kShown[10]) != opts.select.end() &&
233       opts.select.find(kShown[1]) == opts.select.end()) {
234     attrs.push_back(FormatCPUExecTime(node, opts));
235   }
236   return attrs;
237 }
238 
239 }  // namespace tfprof
240 }  // namespace tensorflow
241