1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_
18 
19 #include <vector>
20 
21 #include "absl/container/flat_hash_map.h"
22 #include "absl/strings/string_view.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
25 
26 namespace tensorflow {
27 namespace profiler {
28 
29 // Populates kernel launch information from a kKernelDetails XStat.
30 void ParseKernelLaunchParams(absl::string_view xstat_kernel_details,
31                              KernelReport* kernel);
32 
33 // Returns true if kernel uses TensorCores.
34 bool IsKernelUsingTensorCore(absl::string_view kernel_name);
35 
36 // Returns true if operation is eligible to use TensorCores.
37 bool IsOpTensorCoreEligible(absl::string_view tf_op_name);
38 
39 // Returns true if Einsum equation is eligible to use TensorCores.
40 bool IsEinsumTensorCoreEligible(absl::string_view equation);
41 
42 // Less than comparator for Kernel Reports.
43 struct KernelReportLessThanComparator {
44   bool operator()(const KernelReport& lhs, const KernelReport& rhs) const;
45 };
46 
47 // Equal to comparator for Kernel Reports.
48 struct KernelReportEqualToComparator {
49   bool operator()(const KernelReport& lhs, const KernelReport& rhs) const;
50 };
51 
52 // Sorts kernel reorts by total duration descendingly.
53 // Keeps only the top kernel reports with long kernel duration in the given
54 // KernelStatsDb. Kernel reports with shorter kernel duration are dropped.
55 void SortAndKeepTopKDurationKernelReportsInDb(KernelStatsDb* kernel_stats_db);
56 
57 struct KernelReportValue {
58   uint64 total_duration_ns = 0;
59   uint64 min_duration_ns = 0;
60   uint64 max_duration_ns = 0;
61   uint64 occurrences = 0;
62 };
63 
64 struct KernelKeyWrap {
65   const KernelReport* key;
66   template <typename H>
AbslHashValueKernelKeyWrap67   friend H AbslHashValue(H h, KernelKeyWrap wrap) {
68     // Kernel reports are grouped by these fields, hence they are used as
69     // hashing criteria.
70     // clang-format off
71     return H::combine(
72         std::move(h),
73         wrap.key->is_kernel_using_tensor_core(),
74         wrap.key->is_op_tensor_core_eligible(),
75         wrap.key->block_dim(0),
76         wrap.key->block_dim(1),
77         wrap.key->block_dim(2),
78         wrap.key->grid_dim(0),
79         wrap.key->grid_dim(1),
80         wrap.key->grid_dim(2),
81         wrap.key->registers_per_thread(),
82         wrap.key->static_shmem_bytes(),
83         wrap.key->dynamic_shmem_bytes(),
84         wrap.key->name(),
85         wrap.key->op_name());
86     // clang-format on
87   }
88 };
89 
90 struct KernelHash {
operatorKernelHash91   size_t operator()(const KernelReport& key) const {
92     return absl::Hash<KernelKeyWrap>()(KernelKeyWrap{&key});
93   }
94 };
95 
96 using KernelReportMap =
97     absl::flat_hash_map<KernelReport, KernelReportValue, KernelHash,
98                         KernelReportEqualToComparator>;
99 
100 // Copies the top kernel reports with long kernel duration into the given
101 // KernelStatsDb.
102 void CopyTopKDurationKernelReportsToDb(const KernelReportMap& reports,
103                                        KernelStatsDb* dst);
104 
105 // Inserts or aggregates KernelReports into the given KernelReportMap.
106 void InsertOrUpdateKernelReport(const KernelReport& kernel,
107                                 const KernelReportValue& value,
108                                 KernelReportMap* dst);
109 
110 // Aggregates values from one KernelReportMap into another.
111 void MergeKernelReports(const KernelReportMap& reports, KernelReportMap* dst);
112 
113 // Kernel stats aggregated at TF operation level.
114 struct OpLevelKernelStats {
115   // Whether op is eligible to use TensorCore.
116   bool is_op_tensor_core_eligible = false;
117   // The accumulated duration of all the kernels launched in this op.
118   uint64 total_duration_ns = 0;
119   // The accumulated duration of all the kernels using TensorCore in this op.
120   // If this value is not 0, at least one of the kernels launched by this op
121   // is using TensorCore.
122   uint64 tensor_core_duration_ns = 0;
123 };
124 
125 using KernelStatsByOpName =
126     absl::flat_hash_map<absl::string_view, OpLevelKernelStats>;
127 
128 // Groups KernelReport in <kernel_stats_db> by tensorflow operation name.
129 KernelStatsByOpName GroupKernelReportsByOpName(
130     const KernelStatsDb& kernel_stats_db);
131 
132 }  // namespace profiler
133 }  // namespace tensorflow
134 
135 #endif  // TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_
136