1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/util/stats_calculator.h"
17 
18 #include <iomanip>
19 #include <map>
20 #include <queue>
21 #include <sstream>
22 #include <string>
23 
24 namespace tensorflow {
25 
StatsCalculator(const StatSummarizerOptions & options)26 StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
27     : options_(options) {}
28 
GetShortSummary() const29 std::string StatsCalculator::GetShortSummary() const {
30   std::stringstream stream;
31   stream << "Timings (microseconds): ";
32   run_total_us_.OutputToStream(&stream);
33   stream << std::endl;
34 
35   stream << "Memory (bytes): ";
36   memory_.OutputToStream(&stream);
37   stream << std::endl;
38 
39   stream << details_.size() << " nodes observed" << std::endl;
40   return stream.str();
41 }
42 
InitField(std::ostream & stream,int width)43 std::ostream& InitField(std::ostream& stream, int width) {
44   stream << "\t" << std::right << std::setw(width) << std::fixed
45          << std::setprecision(3);
46   return stream;
47 }
48 
HeaderString(const std::string & title) const49 std::string StatsCalculator::HeaderString(const std::string& title) const {
50   std::stringstream stream;
51 
52   stream << "============================== " << title
53          << " ==============================" << std::endl;
54 
55   InitField(stream, 24) << "[node type]";
56   InitField(stream, 17) << "[start]";
57   InitField(stream, 9) << "[first]";
58   InitField(stream, 9) << "[avg ms]";
59   InitField(stream, 8) << "[%]";
60   InitField(stream, 8) << "[cdf%]";
61   InitField(stream, 10) << "[mem KB]";
62   InitField(stream, 9) << "[times called]";
63   stream << "\t"
64          << "[Name]";
65   return stream.str();
66 }
67 
ColumnString(const Detail & detail,const int64_t cumulative_stat_on_node,const Stat<int64_t> & stat) const68 std::string StatsCalculator::ColumnString(const Detail& detail,
69                                           const int64_t cumulative_stat_on_node,
70                                           const Stat<int64_t>& stat) const {
71   const double start_ms = detail.start_us.avg() / 1000.0;
72   const double first_time_ms = detail.rel_end_us.first() / 1000.0;
73   const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
74   const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
75   const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
76   const int64_t times_called = detail.times_called / num_runs();
77 
78   std::stringstream stream;
79   InitField(stream, 24) << detail.type;
80   InitField(stream, 17) << start_ms;
81   InitField(stream, 9) << first_time_ms;
82   InitField(stream, 9) << avg_time_ms;
83   InitField(stream, 7) << percentage << "%";
84   InitField(stream, 7) << cdf_percentage << "%";
85   InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
86   InitField(stream, 9) << times_called;
87   stream << "\t" << detail.name;
88 
89   return stream.str();
90 }
91 
OrderNodesByMetric(SortingMetric metric,std::vector<const Detail * > * details) const92 void StatsCalculator::OrderNodesByMetric(
93     SortingMetric metric, std::vector<const Detail*>* details) const {
94   std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
95   const int num_nodes = details_.size();
96 
97   for (const auto& det : details_) {
98     const Detail* detail = &(det.second);
99     std::stringstream stream;
100     stream << std::setw(20) << std::right << std::setprecision(10)
101            << std::fixed;
102 
103     switch (metric) {
104       case BY_NAME:
105         stream << detail->name;
106         break;
107       case BY_RUN_ORDER:
108         stream << num_nodes - detail->run_order;
109         break;
110       case BY_TIME:
111         stream << detail->rel_end_us.avg();
112         break;
113       case BY_MEMORY:
114         stream << detail->mem_used.avg();
115         break;
116       case BY_TYPE:
117         stream << detail->type;
118         break;
119       default:
120         stream << "";
121         break;
122     }
123 
124     sorted_list.emplace(stream.str(), detail);
125   }
126 
127   while (!sorted_list.empty()) {
128     auto entry = sorted_list.top();
129     sorted_list.pop();
130     details->push_back(entry.second);
131   }
132 }
133 
ComputeStatsByType(std::map<std::string,int64_t> * node_type_map_count,std::map<std::string,int64_t> * node_type_map_time,std::map<std::string,int64_t> * node_type_map_memory,std::map<std::string,int64_t> * node_type_map_times_called,int64_t * accumulated_us) const134 void StatsCalculator::ComputeStatsByType(
135     std::map<std::string, int64_t>* node_type_map_count,
136     std::map<std::string, int64_t>* node_type_map_time,
137     std::map<std::string, int64_t>* node_type_map_memory,
138     std::map<std::string, int64_t>* node_type_map_times_called,
139     int64_t* accumulated_us) const {
140   int64_t run_count = run_total_us_.count();
141 
142   for (const auto& det : details_) {
143     const std::string node_name = det.first;
144     const Detail& detail = det.second;
145 
146     int64_t curr_time_val =
147         static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
148     *accumulated_us += curr_time_val;
149 
150     int64_t curr_memory_val = detail.mem_used.newest();
151 
152     const std::string& node_type = detail.type;
153 
154     (*node_type_map_count)[node_type] += 1;
155     (*node_type_map_time)[node_type] += curr_time_val;
156     (*node_type_map_memory)[node_type] += curr_memory_val;
157     (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
158   }
159 }
160 
GetStatsByNodeType() const161 std::string StatsCalculator::GetStatsByNodeType() const {
162   std::stringstream stream;
163 
164   stream << "Number of nodes executed: " << details_.size() << std::endl;
165 
166   stream << "============================== Summary by node type "
167             "=============================="
168          << std::endl;
169 
170   std::map<std::string, int64_t> node_type_map_count;
171   std::map<std::string, int64_t> node_type_map_time;
172   std::map<std::string, int64_t> node_type_map_memory;
173   std::map<std::string, int64_t> node_type_map_times_called;
174   int64_t accumulated_us = 0;
175 
176   ComputeStatsByType(&node_type_map_count, &node_type_map_time,
177                      &node_type_map_memory, &node_type_map_times_called,
178                      &accumulated_us);
179 
180   // Sort them.
181   std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
182       timings;
183   for (const auto& node_type : node_type_map_time) {
184     const int64_t mem_used = node_type_map_memory[node_type.first];
185     timings.emplace(node_type.second,
186                     std::pair<std::string, int64_t>(node_type.first, mem_used));
187   }
188 
189   InitField(stream, 24) << "[Node type]";
190   InitField(stream, 9) << "[count]";
191   InitField(stream, 10) << "[avg ms]";
192   InitField(stream, 11) << "[avg %]";
193   InitField(stream, 11) << "[cdf %]";
194   InitField(stream, 10) << "[mem KB]";
195   InitField(stream, 10) << "[times called]";
196   stream << std::endl;
197 
198   float cdf = 0.0f;
199   while (!timings.empty()) {
200     auto entry = timings.top();
201     timings.pop();
202 
203     const std::string node_type = entry.second.first;
204     const float memory = entry.second.second / 1000.0f;
205 
206     const int64_t node_type_total_us = entry.first;
207     const float time_per_run_ms = node_type_total_us / 1000.0f;
208 
209     const float percentage =
210         ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
211     cdf += percentage;
212 
213     InitField(stream, 24) << node_type;
214     InitField(stream, 9) << node_type_map_count[node_type];
215     InitField(stream, 10) << time_per_run_ms;
216     InitField(stream, 10) << percentage << "%";
217     InitField(stream, 10) << cdf << "%";
218     InitField(stream, 10) << memory;
219     InitField(stream, 9) << node_type_map_times_called[node_type];
220     stream << std::endl;
221   }
222   stream << std::endl;
223   return stream.str();
224 }
225 
GetStatsByMetric(const std::string & title,SortingMetric sorting_metric,int num_stats) const226 std::string StatsCalculator::GetStatsByMetric(const std::string& title,
227                                               SortingMetric sorting_metric,
228                                               int num_stats) const {
229   std::vector<const Detail*> details;
230   OrderNodesByMetric(sorting_metric, &details);
231 
232   double cumulative_stat_on_node = 0;
233 
234   std::stringstream stream;
235   stream << HeaderString(title) << std::endl;
236   int stat_num = 0;
237   for (auto detail : details) {
238     ++stat_num;
239     if (num_stats > 0 && stat_num > num_stats) {
240       break;
241     }
242 
243     // TODO(andrewharp): Make this keep track of the particular metric for cdf.
244     cumulative_stat_on_node += detail->rel_end_us.sum();
245     stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
246            << std::endl;
247   }
248   stream << std::endl;
249   return stream.str();
250 }
251 
GetOutputString() const252 std::string StatsCalculator::GetOutputString() const {
253   std::stringstream stream;
254   if (options_.show_run_order) {
255     stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
256                                options_.run_order_limit);
257   }
258   if (options_.show_time) {
259     stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
260                                options_.time_limit);
261   }
262   if (options_.show_memory) {
263     stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
264                                options_.memory_limit);
265   }
266   if (options_.show_type) {
267     stream << GetStatsByNodeType();
268   }
269   if (options_.show_summary) {
270     stream << GetShortSummary() << std::endl;
271   }
272   return stream.str();
273 }
274 
AddNodeStats(const std::string & name,const std::string & type,int64_t run_order,int64_t start_us,int64_t rel_end_us,int64_t mem_used)275 void StatsCalculator::AddNodeStats(const std::string& name,
276                                    const std::string& type, int64_t run_order,
277                                    int64_t start_us, int64_t rel_end_us,
278                                    int64_t mem_used) {
279   Detail* detail = nullptr;
280   if (details_.find(name) == details_.end()) {
281     details_.insert({name, {}});
282     detail = &details_.at(name);
283     detail->type = type;
284     detail->name = name;
285     detail->run_order = run_order;
286   } else {
287     detail = &details_.at(name);
288   }
289   detail->start_us.UpdateStat(start_us);
290   detail->rel_end_us.UpdateStat(rel_end_us);
291   detail->mem_used.UpdateStat(mem_used);
292   detail->times_called++;
293 }
294 
295 }  // namespace tensorflow
296