1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
17 #define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
18 
19 #include <stdlib.h>
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <map>
25 #include <sstream>
26 #include <string>
27 #include <vector>
28 
29 #include "tensorflow/core/util/stat_summarizer_options.h"
30 
31 namespace tensorflow {
32 
33 template <typename ValueType, typename HighPrecisionValueType = double>
34 class Stat {
35  public:
UpdateStat(ValueType v)36   void UpdateStat(ValueType v) {
37     if (count_ == 0) {
38       first_ = v;
39     }
40 
41     newest_ = v;
42     max_ = std::max(v, max_);
43     min_ = std::min(v, min_);
44     ++count_;
45     sum_ += v;
46     squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
47   }
48 
Reset()49   void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
50 
empty()51   bool empty() const { return count_ == 0; }
52 
first()53   ValueType first() const { return first_; }
54 
newest()55   ValueType newest() const { return newest_; }
56 
max()57   ValueType max() const { return max_; }
58 
min()59   ValueType min() const { return min_; }
60 
count()61   int64_t count() const { return count_; }
62 
sum()63   ValueType sum() const { return sum_; }
64 
squared_sum()65   HighPrecisionValueType squared_sum() const { return squared_sum_; }
66 
all_same()67   bool all_same() const { return (count_ == 0 || min_ == max_); }
68 
avg()69   HighPrecisionValueType avg() const {
70     return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
71                    : static_cast<HighPrecisionValueType>(sum_) / count_;
72   }
73 
74   // Returns sample variance.
sample_variance()75   ValueType sample_variance() const {
76     return all_same()
77                ? 0
78                : (squared_sum_ - std::pow(sum_, 2.0) / count_) / (count_ - 1);
79   }
80 
81   // Returns population variance.
variance()82   ValueType variance() const {
83     return all_same() ? 0 : (squared_sum_ / count_) - (avg() * avg());
84   }
85 
86   // Returns population stddev.
std_deviation()87   ValueType std_deviation() const {
88     return all_same() ? 0 : std::sqrt(variance());
89   }
90 
OutputToStream(std::ostream * stream)91   void OutputToStream(std::ostream* stream) const {
92     if (empty()) {
93       *stream << "count=0";
94     } else if (all_same()) {
95       *stream << "count=" << count_ << " curr=" << newest_;
96       if (count_ > 1) *stream << "(all same)";
97     } else {
98       *stream << "count=" << count_ << " first=" << first_
99               << " curr=" << newest_ << " min=" << min_ << " max=" << max_
100               << " avg=" << avg() << " std=" << std_deviation();
101     }
102   }
103 
104   friend std::ostream& operator<<(std::ostream& stream,
105                                   const Stat<ValueType>& stat) {
106     stat.OutputToStream(&stream);
107     return stream;
108   }
109 
110  private:
111   ValueType first_ = 0;
112   ValueType newest_ = 0;
113   ValueType max_ = std::numeric_limits<ValueType>::min();
114   ValueType min_ = std::numeric_limits<ValueType>::max();
115   int64_t count_ = 0;
116   ValueType sum_ = 0;
117   HighPrecisionValueType squared_sum_ = 0;
118 };
119 
120 // A StatsCalculator assists in performance analysis of Graph executions.
121 //
122 // It summarizes time spent executing (on GPU/CPU), memory used etc for
123 // graph execution.
124 //
125 // For example usage see StatsSummarizer.
126 class StatsCalculator {
127  public:
128   enum SortingMetric {
129     BY_NAME,
130     BY_RUN_ORDER,
131     BY_TIME,
132     BY_MEMORY,
133     BY_TYPE,
134   };
135 
136   explicit StatsCalculator(const StatSummarizerOptions& options);
137 
138   // Returns a string detailing the accumulated runtime stats in a tab-separated
139   // format which can be pasted into a spreadsheet for further analysis.
140   std::string GetOutputString() const;
141 
142   std::string GetShortSummary() const;
143 
144   void ComputeStatsByType(
145       std::map<std::string, int64_t>* node_type_map_count,
146       std::map<std::string, int64_t>* node_type_map_time,
147       std::map<std::string, int64_t>* node_type_map_memory,
148       std::map<std::string, int64_t>* node_type_map_times_called,
149       int64_t* accumulated_us) const;
150 
151   std::string GetStatsByNodeType() const;
152 
153   std::string GetStatsByMetric(const std::string& title,
154                                SortingMetric sorting_metric,
155                                int num_stats) const;
156 
157   // Returns number of runs.
num_runs()158   int num_runs() const { return static_cast<int>(run_total_us_.count()); }
159 
160   // Returns stats of total microseconds spent by all nodes in each run.
run_total_us()161   const Stat<int64_t>& run_total_us() const { return run_total_us_; }
162 
UpdateRunTotalUs(int64_t run_total_us)163   void UpdateRunTotalUs(int64_t run_total_us) {
164     run_total_us_.UpdateStat(run_total_us);
165   }
166 
UpdateMemoryUsed(int64_t memory)167   void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
168 
169   struct Detail {
170     std::string name;
171     std::string type;
172     int64_t run_order;
173     Stat<int64_t> start_us;
174     Stat<int64_t> rel_end_us;
175     Stat<int64_t> mem_used;
176     int64_t times_called;
177   };
178 
GetDetails()179   const std::map<std::string, Detail>& GetDetails() const { return details_; }
180 
181   void AddNodeStats(const std::string& name, const std::string& type,
182                     int64_t run_order, int64_t start_us, int64_t rel_end_us,
183                     int64_t mem_used);
184 
185  private:
186   void OrderNodesByMetric(SortingMetric sorting_metric,
187                           std::vector<const Detail*>* details) const;
188 
189   std::string HeaderString(const std::string& title) const;
190   std::string ColumnString(const Detail& detail,
191                            const int64_t cumulative_stat_on_node,
192                            const Stat<int64_t>& stat) const;
193 
194   Stat<int64_t> run_total_us_;
195   Stat<int64_t> memory_;
196 
197   std::map<std::string, Detail> details_;
198   StatSummarizerOptions options_;
199 };
200 
201 }  // namespace tensorflow
202 
203 #endif  // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
204