1// This proto describes the format of the output profile file from
2// the TF-stats tool.
3syntax = "proto3";
4
5package tensorflow.profiler;
6
7// A database of TfStatsTables.
8message TfStatsDatabase {
9  // The table that includes IDLE time.
10  TfStatsTable with_idle = 4;
11  // The table that excludes IDLE time.
12  TfStatsTable without_idle = 5;
13  // The type of device used.
14  string device_type = 6;
15  reserved 1, 2, 3;
16}
17
18// A table of TFStatsRecords plus the corresponding pprof keys.
19message TfStatsTable {
20  // All TfStats records, one for each TF operation.
21  repeated TfStatsRecord tf_stats_record = 1;
22  // key to the pprof profile for host TF operations.
23  string host_tf_pprof_key = 2;
24  // key to the pprof profile for device TF operations.
25  string device_tf_pprof_key = 3;
26}
27
28// There is one TfStatsRecord for each TF operation profiled.
29message TfStatsRecord {
30  // Rank of this TF-op among all TF-ops.
31  uint64 rank = 1;
32  // Whether this TF-op is on "Host" or "Device".
33  string host_or_device = 2;
34  // TF-op type.
35  string op_type = 3;
36  // TF-op name.
37  string op_name = 4;
38  // Number of occurrences of the operation.
39  int64 occurrences = 5;
40  // Total "accumulated" time in micro-seconds that the operation
41  // took. If this operation has any children operations,
42  // the "accumulated" time includes the time spent inside children.
43  double total_time_in_us = 6;
44  // Average "accumulated" time in micro-seconds that each
45  // occurrence of the operation took.
46  double avg_time_in_us = 7;
47  // Total "self" time in micro-seconds that the operation took.
48  // If this operation has any children operations, the "self" time
49  // doesn't include the time spent inside children.
50  double total_self_time_in_us = 8;
51  // Average "self" time in micro-seconds that the operation took.
52  double avg_self_time_in_us = 9;
53  // Total "self" time as fraction of the sum of the total self-time
54  // of operations run on the device. It is 0 if this op runs on the host.
55  double device_total_self_time_as_fraction = 10;
56  // Cumulative value of device_total_self_time_as_fraction.
57  double device_cumulative_total_self_time_as_fraction = 11;
58  // Total "self" time as fraction of the sum of the total self-time
59  // of operations run on the host. It is 0 if this op runs on the device.
60  double host_total_self_time_as_fraction = 12;
61  // Cumulative value of host_total_self_time_as_fraction.
62  double host_cumulative_total_self_time_as_fraction = 13;
63  // Number of floating-point operations (FLOPs) performed per
64  // second.
65  double measured_flop_rate = 14;
66  // Number of bytes (including both read and write) accessed per
67  // second.
68  double measured_memory_bw = 15;
69  // Operational intensity, which is defined as FLOPs/bytes-accessed.
70  double operational_intensity = 16;
71  // Whether this operation is "Compute" or "Memory" bound,
72  // according to the Roofline Model.
73  string bound_by = 17;
74  // Whether this TF-op is eagerly executed.
75  bool is_eager = 18;
76  // Fraction of kernel time that utilizes GPU TensorCore.
77  // It is 0.0 if this op does not run on a GPU device.
78  double gpu_tensorcore_utilization = 19;
79}
80