1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_ 18 19 #include <vector> 20 21 #include "absl/container/flat_hash_map.h" 22 #include "absl/strings/string_view.h" 23 #include "tensorflow/core/platform/types.h" 24 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" 25 26 namespace tensorflow { 27 namespace profiler { 28 29 // Populates kernel launch information from a kKernelDetails XStat. 30 void ParseKernelLaunchParams(absl::string_view xstat_kernel_details, 31 KernelReport* kernel); 32 33 // Returns true if kernel uses TensorCores. 34 bool IsKernelUsingTensorCore(absl::string_view kernel_name); 35 36 // Returns true if operation is eligible to use TensorCores. 37 bool IsOpTensorCoreEligible(absl::string_view tf_op_name); 38 39 // Returns true if Einsum equation is eligible to use TensorCores. 40 bool IsEinsumTensorCoreEligible(absl::string_view equation); 41 42 // Less than comparator for Kernel Reports. 43 struct KernelReportLessThanComparator { 44 bool operator()(const KernelReport& lhs, const KernelReport& rhs) const; 45 }; 46 47 // Equal to comparator for Kernel Reports. 48 struct KernelReportEqualToComparator { 49 bool operator()(const KernelReport& lhs, const KernelReport& rhs) const; 50 }; 51 52 // Sorts kernel reorts by total duration descendingly. 53 // Keeps only the top kernel reports with long kernel duration in the given 54 // KernelStatsDb. Kernel reports with shorter kernel duration are dropped. 55 void SortAndKeepTopKDurationKernelReportsInDb(KernelStatsDb* kernel_stats_db); 56 57 struct KernelReportValue { 58 uint64 total_duration_ns = 0; 59 uint64 min_duration_ns = 0; 60 uint64 max_duration_ns = 0; 61 uint64 occurrences = 0; 62 }; 63 64 struct KernelKeyWrap { 65 const KernelReport* key; 66 template <typename H> AbslHashValueKernelKeyWrap67 friend H AbslHashValue(H h, KernelKeyWrap wrap) { 68 // Kernel reports are grouped by these fields, hence they are used as 69 // hashing criteria. 70 // clang-format off 71 return H::combine( 72 std::move(h), 73 wrap.key->is_kernel_using_tensor_core(), 74 wrap.key->is_op_tensor_core_eligible(), 75 wrap.key->block_dim(0), 76 wrap.key->block_dim(1), 77 wrap.key->block_dim(2), 78 wrap.key->grid_dim(0), 79 wrap.key->grid_dim(1), 80 wrap.key->grid_dim(2), 81 wrap.key->registers_per_thread(), 82 wrap.key->static_shmem_bytes(), 83 wrap.key->dynamic_shmem_bytes(), 84 wrap.key->name(), 85 wrap.key->op_name()); 86 // clang-format on 87 } 88 }; 89 90 struct KernelHash { operatorKernelHash91 size_t operator()(const KernelReport& key) const { 92 return absl::Hash<KernelKeyWrap>()(KernelKeyWrap{&key}); 93 } 94 }; 95 96 using KernelReportMap = 97 absl::flat_hash_map<KernelReport, KernelReportValue, KernelHash, 98 KernelReportEqualToComparator>; 99 100 // Copies the top kernel reports with long kernel duration into the given 101 // KernelStatsDb. 102 void CopyTopKDurationKernelReportsToDb(const KernelReportMap& reports, 103 KernelStatsDb* dst); 104 105 // Inserts or aggregates KernelReports into the given KernelReportMap. 106 void InsertOrUpdateKernelReport(const KernelReport& kernel, 107 const KernelReportValue& value, 108 KernelReportMap* dst); 109 110 // Aggregates values from one KernelReportMap into another. 111 void MergeKernelReports(const KernelReportMap& reports, KernelReportMap* dst); 112 113 // Kernel stats aggregated at TF operation level. 114 struct OpLevelKernelStats { 115 // Whether op is eligible to use TensorCore. 116 bool is_op_tensor_core_eligible = false; 117 // The accumulated duration of all the kernels launched in this op. 118 uint64 total_duration_ns = 0; 119 // The accumulated duration of all the kernels using TensorCore in this op. 120 // If this value is not 0, at least one of the kernels launched by this op 121 // is using TensorCore. 122 uint64 tensor_core_duration_ns = 0; 123 }; 124 125 using KernelStatsByOpName = 126 absl::flat_hash_map<absl::string_view, OpLevelKernelStats>; 127 128 // Groups KernelReport in <kernel_stats_db> by tensorflow operation name. 129 KernelStatsByOpName GroupKernelReportsByOpName( 130 const KernelStatsDb& kernel_stats_db); 131 132 } // namespace profiler 133 } // namespace tensorflow 134 135 #endif // TENSORFLOW_CORE_PROFILER_UTILS_KERNEL_STATS_UTILS_H_ 136