1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/internal/tfprof_node_show.h"
16 
17 namespace tensorflow {
18 namespace tfprof {
19 namespace {}
20 
ShowNode(const TFGraphNode * node)21 ShowNode::ShowNode(const TFGraphNode* node) : node(node), account(false) {
22   ReInit(-1);
23 }
24 
ReInit(int64 step)25 void ShowNode::ReInit(int64 step) {
26   mutable_proto()->set_name(name());
27   mutable_proto()->clear_devices();
28   if (!node->canonical_device().empty()) {
29     mutable_proto()->add_devices(node->canonical_device());
30   }
31   mutable_proto()->set_run_count(node->run_count(step));
32   mutable_proto()->set_exec_micros(node->exec_micros(step));
33   mutable_proto()->set_accelerator_exec_micros(
34       node->accelerator_exec_micros(step));
35   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros(step));
36 
37   mutable_proto()->set_requested_bytes(node->requested_bytes(step));
38   mutable_proto()->set_peak_bytes(node->peak_bytes(step));
39   mutable_proto()->set_residual_bytes(node->residual_bytes(step));
40   mutable_proto()->set_output_bytes(node->output_bytes(step));
41 
42   mutable_proto()->set_float_ops(node->float_ops(step));
43 
44   mutable_proto()->clear_input_shapes();
45   for (const auto& inp : node->input_shapes()) {
46     (*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
47         VecToShapeProto(inp.second));
48   }
49   proto_.set_parameters(node->parameters());
50 }
51 
mutable_proto()52 GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
53 
proto() const54 const GraphNodeProto& ShowNode::proto() const { return proto_; }
55 
AggregateTotalStats(ShowNode * node)56 void ShowNode::AggregateTotalStats(ShowNode* node) {
57   GraphNodeProto* node_pb = node->mutable_proto();
58   mutable_proto()->set_total_run_count(proto().total_run_count() +
59                                        node_pb->total_run_count());
60   mutable_proto()->set_total_definition_count(
61       proto().total_definition_count() + node_pb->total_definition_count());
62   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
63                                          node_pb->total_exec_micros());
64   mutable_proto()->set_total_accelerator_exec_micros(
65       proto().total_accelerator_exec_micros() +
66       node_pb->total_accelerator_exec_micros());
67   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
68                                              node_pb->total_cpu_exec_micros());
69 
70   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
71                                              node_pb->total_requested_bytes());
72   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
73                                         node_pb->total_peak_bytes());
74   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
75                                             node_pb->total_residual_bytes());
76   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
77                                           node_pb->total_output_bytes());
78   mutable_proto()->set_total_parameters(proto().total_parameters() +
79                                         node_pb->total_parameters());
80   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
81                                        node_pb->total_float_ops());
82 }
83 
AddSelfToTotalStats()84 void ShowNode::AddSelfToTotalStats() {
85   mutable_proto()->set_total_definition_count(proto().total_definition_count() +
86                                               1);
87   mutable_proto()->set_total_run_count(proto().total_run_count() +
88                                        proto().run_count());
89   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
90                                          proto().exec_micros());
91   mutable_proto()->set_total_accelerator_exec_micros(
92       proto().total_accelerator_exec_micros() +
93       proto().accelerator_exec_micros());
94   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
95                                              proto().cpu_exec_micros());
96 
97   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
98                                              proto().requested_bytes());
99   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
100                                         proto().peak_bytes());
101   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
102                                             proto().residual_bytes());
103   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
104                                           proto().output_bytes());
105 
106   mutable_proto()->set_total_parameters(proto().total_parameters() +
107                                         proto().parameters());
108   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
109                                        proto().float_ops());
110 }
111 
ResetTotalStats()112 void ShowNode::ResetTotalStats() {
113   formatted_str.clear();
114 
115   mutable_proto()->set_total_definition_count(0);
116   mutable_proto()->set_total_run_count(0);
117   mutable_proto()->set_total_exec_micros(0);
118   mutable_proto()->set_total_accelerator_exec_micros(0);
119   mutable_proto()->set_total_cpu_exec_micros(0);
120 
121   mutable_proto()->set_total_requested_bytes(0);
122   mutable_proto()->set_total_peak_bytes(0);
123   mutable_proto()->set_total_residual_bytes(0);
124   mutable_proto()->set_total_output_bytes(0);
125 
126   mutable_proto()->set_total_parameters(0);
127   mutable_proto()->set_total_float_ops(0);
128   mutable_proto()->mutable_children()->Clear();
129 }
130 
ShowMultiNode(TFMultiGraphNode * node)131 ShowMultiNode::ShowMultiNode(TFMultiGraphNode* node)
132     : node(node), account(false), show(false) {
133   ReInit(-1, {".*"});
134 }
135 
ReInit(int64 step,const std::vector<string> & type_regexes)136 bool ShowMultiNode::ReInit(int64 step,
137                            const std::vector<string>& type_regexes) {
138   bool has_matched_type = node->SnapshotNodes(step, type_regexes);
139 
140   std::vector<ShowNode> snodes;
141   mutable_proto()->mutable_graph_nodes()->Clear();
142   for (const auto& it : node->graph_nodes()) {
143     ShowNode snode(it.second);
144     snodes.push_back(snode);
145     snodes.back().ReInit(step);
146     snodes.back().AddSelfToTotalStats();
147     mutable_proto()->add_graph_nodes()->MergeFrom(snodes.back().proto());
148   }
149 
150   mutable_proto()->set_name(name());
151   mutable_proto()->set_exec_micros(node->exec_micros());
152   mutable_proto()->set_accelerator_exec_micros(node->accelerator_exec_micros());
153   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros());
154 
155   mutable_proto()->set_requested_bytes(node->requested_bytes());
156   mutable_proto()->set_peak_bytes(node->peak_bytes());
157   mutable_proto()->set_residual_bytes(node->residual_bytes());
158   mutable_proto()->set_output_bytes(node->output_bytes());
159 
160   mutable_proto()->set_float_ops(node->float_ops());
161 
162   mutable_proto()->set_parameters(node->parameters());
163   return has_matched_type;
164 }
165 
mutable_proto()166 MultiGraphNodeProto* ShowMultiNode::mutable_proto() { return &proto_; }
167 
proto() const168 const MultiGraphNodeProto& ShowMultiNode::proto() const { return proto_; }
169 
AggregateTotalStats(ShowMultiNode * node)170 void ShowMultiNode::AggregateTotalStats(ShowMultiNode* node) {
171   MultiGraphNodeProto* node_pb = node->mutable_proto();
172   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
173                                          node_pb->total_exec_micros());
174   mutable_proto()->set_total_accelerator_exec_micros(
175       proto().total_accelerator_exec_micros() +
176       node_pb->total_accelerator_exec_micros());
177   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
178                                              node_pb->total_cpu_exec_micros());
179 
180   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
181                                              node_pb->total_requested_bytes());
182   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
183                                         node_pb->total_peak_bytes());
184   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
185                                             node_pb->total_residual_bytes());
186   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
187                                           node_pb->total_output_bytes());
188 
189   mutable_proto()->set_total_parameters(proto().total_parameters() +
190                                         node_pb->total_parameters());
191   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
192                                        node_pb->total_float_ops());
193 }
194 
AddSelfToTotalStats()195 void ShowMultiNode::AddSelfToTotalStats() {
196   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
197                                          proto().exec_micros());
198   mutable_proto()->set_total_accelerator_exec_micros(
199       proto().total_accelerator_exec_micros() +
200       proto().accelerator_exec_micros());
201   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
202                                              proto().cpu_exec_micros());
203 
204   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
205                                              proto().requested_bytes());
206   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
207                                         proto().peak_bytes());
208   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
209                                             proto().residual_bytes());
210   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
211                                           proto().output_bytes());
212 
213   mutable_proto()->set_total_parameters(proto().total_parameters() +
214                                         proto().parameters());
215   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
216                                        proto().float_ops());
217 }
218 
ResetTotalStats()219 void ShowMultiNode::ResetTotalStats() {
220   formatted_str.clear();
221   mutable_proto()->set_total_exec_micros(0);
222   mutable_proto()->set_total_accelerator_exec_micros(0);
223   mutable_proto()->set_total_cpu_exec_micros(0);
224 
225   mutable_proto()->set_total_requested_bytes(0);
226   mutable_proto()->set_total_peak_bytes(0);
227   mutable_proto()->set_total_residual_bytes(0);
228   mutable_proto()->set_total_output_bytes(0);
229 
230   mutable_proto()->set_total_parameters(0);
231   mutable_proto()->set_total_float_ops(0);
232   mutable_proto()->mutable_children()->Clear();
233 }
234 
235 }  // namespace tfprof
236 }  // namespace tensorflow
237