1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
17 
18 #include "tensorflow/core/platform/types.h"
19 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
20 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
21 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
22 #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
23 #include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
24 #include "tensorflow/core/profiler/utils/math_utils.h"
25 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
26 #include "tensorflow/core/profiler/utils/time_utils.h"
27 
28 namespace tensorflow {
29 namespace profiler {
30 namespace {
31 
32 // The maximum number of Tensorflow Ops displayed on Tensorflow Stats page.
33 // 500 device side ops and 500 host side ops.
34 const int kMaxNumOfOps = 500;
35 
ConvertOpMetricsToTfStatsRecord(bool on_device,const OpMetrics & metrics,double ridge_point_operational_intensity)36 TfStatsRecord ConvertOpMetricsToTfStatsRecord(
37     bool on_device, const OpMetrics& metrics,
38     double ridge_point_operational_intensity) {
39   TfStatsRecord record;
40   record.set_host_or_device(on_device ? "Device" : "Host");
41   record.set_is_eager(metrics.is_eager());
42   record.set_op_type(metrics.category());
43   record.set_op_name(metrics.name());
44   SetExecutionTimes(metrics, &record);
45   SetRooflineMetrics(metrics, ridge_point_operational_intensity, &record);
46   return record;
47 }
48 
GenerateTfStatsTable(const OpMetricsDb & host_tf_metrics_db,const OpMetricsDb & device_tf_metrics_db,const KernelStatsByOpName & kernel_stats_by_op_name,double ridge_point,bool exclude_idle)49 TfStatsTable GenerateTfStatsTable(
50     const OpMetricsDb& host_tf_metrics_db,
51     const OpMetricsDb& device_tf_metrics_db,
52     const KernelStatsByOpName& kernel_stats_by_op_name, double ridge_point,
53     bool exclude_idle) {
54   TfStatsTable tf_stats_table;
55   TfStatsRecord sentinel;
56   sentinel.set_rank(0);
57   sentinel.set_device_cumulative_total_self_time_as_fraction(0.0);
58   sentinel.set_host_cumulative_total_self_time_as_fraction(0.0);
59   const TfStatsRecord* prev_record = &sentinel;
60 
61   // Sets device-side TF stats.
62   uint64 total_device_time_ps = device_tf_metrics_db.total_time_ps();
63   if (exclude_idle) {
64     total_device_time_ps -= IdleTimePs(device_tf_metrics_db);
65   }
66   double total_device_time_us = PicosToMicros(total_device_time_ps);
67   for (const OpMetrics* metrics :
68        SortedOpMetricsDb(device_tf_metrics_db, kMaxNumOfOps)) {
69     if (exclude_idle && IsIdleOp(*metrics)) continue;
70     TfStatsRecord* record = tf_stats_table.add_tf_stats_record();
71     *record = ConvertOpMetricsToTfStatsRecord(
72         /*on_device=*/true, *metrics, ridge_point);
73     // Compute TensorCore utilization only on device side.
74     auto iter = kernel_stats_by_op_name.find(record->op_name());
75     if (iter != kernel_stats_by_op_name.end()) {
76       record->set_gpu_tensorcore_utilization(
77           SafeDivide(iter->second.tensor_core_duration_ns,
78                      iter->second.total_duration_ns));
79     } else {
80       record->set_gpu_tensorcore_utilization(0.0);
81     }
82     SetRankAndDeviceTimeFractions(total_device_time_us, *prev_record, record);
83     prev_record = record;
84   }
85 
86   // Sets host-side TF stats.
87   uint64 total_host_time_ps = host_tf_metrics_db.total_time_ps();
88   if (exclude_idle) {
89     total_host_time_ps -= IdleTimePs(host_tf_metrics_db);
90   }
91   double total_host_time_us = PicosToMicros(total_host_time_ps);
92   for (const OpMetrics* metrics : tensorflow::profiler::SortedOpMetricsDb(
93            host_tf_metrics_db, kMaxNumOfOps)) {
94     if (exclude_idle && IsIdleOp(*metrics)) continue;
95     TfStatsRecord* record = tf_stats_table.add_tf_stats_record();
96     *record = ConvertOpMetricsToTfStatsRecord(
97         /*on_device=*/false, *metrics, ridge_point);
98     // Host side TensorCore utilization is always 0.0
99     record->set_gpu_tensorcore_utilization(0.0);
100     SetRankAndHostTimeFractions(total_host_time_us, *prev_record, record);
101     prev_record = record;
102   }
103   return tf_stats_table;
104 }
105 
106 }  // namespace
107 
ConvertOpStatsToTfStats(const OpStats & op_stats)108 TfStatsDatabase ConvertOpStatsToTfStats(const OpStats& op_stats) {
109   const OpMetricsDb& host_tf_metrics_db = op_stats.host_op_metrics_db();
110   OpMetricsDb device_tf_metrics_db =
111       CreateTfMetricsDbFromDeviceOpMetricsDb(op_stats.device_op_metrics_db());
112   double ridge_point = op_stats.perf_env().ridge_point();
113   KernelStatsByOpName kernel_stats_by_op_name =
114       GroupKernelReportsByOpName(op_stats.kernel_stats_db());
115   TfStatsDatabase tf_stats_db;
116   *tf_stats_db.mutable_with_idle() = GenerateTfStatsTable(
117       host_tf_metrics_db, device_tf_metrics_db, kernel_stats_by_op_name,
118       ridge_point, /*exclude_idle=*/false);
119   *tf_stats_db.mutable_without_idle() = GenerateTfStatsTable(
120       host_tf_metrics_db, device_tf_metrics_db, kernel_stats_by_op_name,
121       ridge_point, /*exclude_idle=*/true);
122   tf_stats_db.set_device_type(op_stats.run_environment().device_type());
123   return tf_stats_db;
124 }
125 
126 }  // namespace profiler
127 }  // namespace tensorflow
128