1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
17 #define TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
18 
19 #include <stdlib.h>
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <map>
25 #include <sstream>
26 #include <string>
27 #include <vector>
28 
29 #include "tensorflow/core/util/stat_summarizer_options.h"
30 
31 namespace tensorflow {
32 
33 template <typename ValueType, typename HighPrecisionValueType = double>
34 class Stat {
35  public:
UpdateStat(ValueType v)36   void UpdateStat(ValueType v) {
37     if (count_ == 0) {
38       first_ = v;
39     }
40 
41     newest_ = v;
42     max_ = std::max(v, max_);
43     min_ = std::min(v, min_);
44     ++count_;
45     sum_ += v;
46     squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
47   }
48 
Reset()49   void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
50 
empty()51   bool empty() const { return count_ == 0; }
52 
first()53   ValueType first() const { return first_; }
54 
newest()55   ValueType newest() const { return newest_; }
56 
max()57   ValueType max() const { return max_; }
58 
min()59   ValueType min() const { return min_; }
60 
count()61   int64_t count() const { return count_; }
62 
sum()63   ValueType sum() const { return sum_; }
64 
squared_sum()65   HighPrecisionValueType squared_sum() const { return squared_sum_; }
66 
all_same()67   bool all_same() const { return (count_ == 0 || min_ == max_); }
68 
avg()69   HighPrecisionValueType avg() const {
70     return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
71                    : static_cast<HighPrecisionValueType>(sum_) / count_;
72   }
73 
std_deviation()74   ValueType std_deviation() const {
75     return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
76   }
77 
OutputToStream(std::ostream * stream)78   void OutputToStream(std::ostream* stream) const {
79     if (empty()) {
80       *stream << "count=0";
81     } else if (all_same()) {
82       *stream << "count=" << count_ << " curr=" << newest_;
83       if (count_ > 1) *stream << "(all same)";
84     } else {
85       *stream << "count=" << count_ << " first=" << first_
86               << " curr=" << newest_ << " min=" << min_ << " max=" << max_
87               << " avg=" << avg() << " std=" << std_deviation();
88     }
89   }
90 
91   friend std::ostream& operator<<(std::ostream& stream,
92                                   const Stat<ValueType>& stat) {
93     stat.OutputToStream(&stream);
94     return stream;
95   }
96 
97  private:
98   ValueType first_ = 0;
99   ValueType newest_ = 0;
100   ValueType max_ = std::numeric_limits<ValueType>::min();
101   ValueType min_ = std::numeric_limits<ValueType>::max();
102   int64_t count_ = 0;
103   ValueType sum_ = 0;
104   HighPrecisionValueType squared_sum_ = 0;
105 };
106 
107 // A StatsCalculator assists in performance analysis of Graph executions.
108 //
109 // It summarizes time spent executing (on GPU/CPU), memory used etc for
110 // graph execution.
111 //
112 // For example usage see StatsSummarizer.
113 class StatsCalculator {
114  public:
115   enum SortingMetric {
116     BY_NAME,
117     BY_RUN_ORDER,
118     BY_TIME,
119     BY_MEMORY,
120     BY_TYPE,
121   };
122 
123   explicit StatsCalculator(const StatSummarizerOptions& options);
124 
125   // Returns a string detailing the accumulated runtime stats in a tab-separated
126   // format which can be pasted into a spreadsheet for further analysis.
127   std::string GetOutputString() const;
128 
129   std::string GetShortSummary() const;
130 
131   void ComputeStatsByType(
132       std::map<std::string, int64_t>* node_type_map_count,
133       std::map<std::string, int64_t>* node_type_map_time,
134       std::map<std::string, int64_t>* node_type_map_memory,
135       std::map<std::string, int64_t>* node_type_map_times_called,
136       int64_t* accumulated_us) const;
137 
138   std::string GetStatsByNodeType() const;
139 
140   std::string GetStatsByMetric(const std::string& title,
141                                SortingMetric sorting_metric,
142                                int num_stats) const;
143 
144   // Returns number of runs.
num_runs()145   int num_runs() const { return static_cast<int>(run_total_us_.count()); }
146 
147   // Returns stats of total microseconds spent by all nodes in each run.
run_total_us()148   const Stat<int64_t>& run_total_us() const { return run_total_us_; }
149 
UpdateRunTotalUs(int64_t run_total_us)150   void UpdateRunTotalUs(int64_t run_total_us) {
151     run_total_us_.UpdateStat(run_total_us);
152   }
153 
UpdateMemoryUsed(int64_t memory)154   void UpdateMemoryUsed(int64_t memory) { memory_.UpdateStat(memory); }
155 
156   struct Detail {
157     std::string name;
158     std::string type;
159     int64_t run_order;
160     Stat<int64_t> start_us;
161     Stat<int64_t> rel_end_us;
162     Stat<int64_t> mem_used;
163     int64_t times_called;
164   };
165 
GetDetails()166   const std::map<std::string, Detail>& GetDetails() const { return details_; }
167 
168   void AddNodeStats(const std::string& name, const std::string& type,
169                     int64_t run_order, int64_t start_us, int64_t rel_end_us,
170                     int64_t mem_used);
171 
172  private:
173   void OrderNodesByMetric(SortingMetric sorting_metric,
174                           std::vector<const Detail*>* details) const;
175 
176   std::string HeaderString(const std::string& title) const;
177   std::string ColumnString(const Detail& detail,
178                            const int64_t cumulative_stat_on_node,
179                            const Stat<int64_t>& stat) const;
180 
181   Stat<int64_t> run_total_us_;
182   Stat<int64_t> memory_;
183 
184   std::map<std::string, Detail> details_;
185   StatSummarizerOptions options_;
186 };
187 
188 }  // namespace tensorflow
189 
190 #endif  // TENSORFLOW_CORE_UTIL_STATS_CALCULATOR_H_
191