1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
18 
19 #include <vector>
20 
21 #include "absl/strings/string_view.h"
22 #include "tensorflow/compiler/xla/types.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/types.h"
25 
26 namespace xla {
27 
28 // HumanReadableProfileBuilder helps you create a textual profile of a
29 // computation, suitable for consumption by humans.
30 class HumanReadableProfileBuilder {
31  public:
HumanReadableProfileBuilder(absl::string_view computation_name,bool is_entry_computation,int64 total_cycles,double clock_rate_ghz)32   explicit HumanReadableProfileBuilder(absl::string_view computation_name,
33                                        bool is_entry_computation,
34                                        int64 total_cycles,
35                                        double clock_rate_ghz)
36       : computation_name_(computation_name),
37         is_entry_computation_(is_entry_computation),
38         total_cycles_(total_cycles),
39         clock_rate_ghz_(clock_rate_ghz) {
40     CHECK_GE(clock_rate_ghz, 1e-9);
41   }
42 
total_cycles()43   int64 total_cycles() const { return total_cycles_; }
44 
45   // Adds an operation to the profile.  If you don't know the number of
46   // floating-point ops or bytes touched by the op, or if you don't know how
47   // fast it would run optimally, pass -1 for that param.
AddOp(absl::string_view op_name,absl::string_view short_name,absl::string_view category,int64 cycles,int64 flop_count,int64 transcendental_count,int64 bytes_accessed,float optimal_seconds)48   void AddOp(absl::string_view op_name, absl::string_view short_name,
49              absl::string_view category, int64 cycles, int64 flop_count,
50              int64 transcendental_count, int64 bytes_accessed,
51              float optimal_seconds) {
52     op_infos_.push_back({string(op_name), string(short_name), string(category),
53                          cycles, flop_count, transcendental_count,
54                          bytes_accessed, optimal_seconds});
55   }
56 
57   // Gets the human-readable profile.
58   string ToString() const;
59 
60  private:
61   struct OpInfo {
62     string name;
63     string short_name;
64     string category;
65     int64 cycles;
66     int64 flop_count;  // -1 if unknown
67     int64 transcendental_count;
68     int64 bytes_accessed;   // -1 if unknown
69     float optimal_seconds;  // -1 if unknown
70   };
71 
CyclesToSeconds(int64 cycles)72   double CyclesToSeconds(int64 cycles) const {
73     return cycles / clock_rate_ghz_ / 1e9;
74   }
CyclesToMicroseconds(int64 cycles)75   double CyclesToMicroseconds(int64 cycles) const {
76     return cycles / clock_rate_ghz_ / 1000.0;
77   }
78 
79   string computation_name_;
80   bool is_entry_computation_;
81   int64 total_cycles_;
82   double clock_rate_ghz_;
83   std::vector<OpInfo> op_infos_;
84 };
85 
86 }  // namespace xla
87 
88 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
89