1// This proto describes the format of the output profile file from 2// the TF-stats tool. 3syntax = "proto3"; 4 5package tensorflow.profiler; 6 7// A database of TfStatsTables. 8message TfStatsDatabase { 9 // The table that includes IDLE time. 10 TfStatsTable with_idle = 4; 11 // The table that excludes IDLE time. 12 TfStatsTable without_idle = 5; 13 // The type of device used. 14 string device_type = 6; 15 reserved 1, 2, 3; 16} 17 18// A table of TFStatsRecords plus the corresponding pprof keys. 19message TfStatsTable { 20 // All TfStats records, one for each TF operation. 21 repeated TfStatsRecord tf_stats_record = 1; 22 // key to the pprof profile for host TF operations. 23 string host_tf_pprof_key = 2; 24 // key to the pprof profile for device TF operations. 25 string device_tf_pprof_key = 3; 26} 27 28// There is one TfStatsRecord for each TF operation profiled. 29message TfStatsRecord { 30 // Rank of this TF-op among all TF-ops. 31 uint64 rank = 1; 32 // Whether this TF-op is on "Host" or "Device". 33 string host_or_device = 2; 34 // TF-op type. 35 string op_type = 3; 36 // TF-op name. 37 string op_name = 4; 38 // Number of occurrences of the operation. 39 int64 occurrences = 5; 40 // Total "accumulated" time in micro-seconds that the operation 41 // took. If this operation has any children operations, 42 // the "accumulated" time includes the time spent inside children. 43 double total_time_in_us = 6; 44 // Average "accumulated" time in micro-seconds that each 45 // occurrence of the operation took. 46 double avg_time_in_us = 7; 47 // Total "self" time in micro-seconds that the operation took. 48 // If this operation has any children operations, the "self" time 49 // doesn't include the time spent inside children. 50 double total_self_time_in_us = 8; 51 // Average "self" time in micro-seconds that the operation took. 52 double avg_self_time_in_us = 9; 53 // Total "self" time as fraction of the sum of the total self-time 54 // of operations run on the device. It is 0 if this op runs on the host. 55 double device_total_self_time_as_fraction = 10; 56 // Cumulative value of device_total_self_time_as_fraction. 57 double device_cumulative_total_self_time_as_fraction = 11; 58 // Total "self" time as fraction of the sum of the total self-time 59 // of operations run on the host. It is 0 if this op runs on the device. 60 double host_total_self_time_as_fraction = 12; 61 // Cumulative value of host_total_self_time_as_fraction. 62 double host_cumulative_total_self_time_as_fraction = 13; 63 // Number of floating-point operations (FLOPs) performed per 64 // second. 65 double measured_flop_rate = 14; 66 // Number of bytes (including both read and write) accessed per 67 // second. 68 double measured_memory_bw = 15; 69 // Operational intensity, which is defined as FLOPs/bytes-accessed. 70 double operational_intensity = 16; 71 // Whether this operation is "Compute" or "Memory" bound, 72 // according to the Roofline Model. 73 string bound_by = 17; 74 // Whether this TF-op is eagerly executed. 75 bool is_eager = 18; 76 // Fraction of kernel time that utilizes GPU TensorCore. 77 // It is 0.0 if this op does not run on a GPU device. 78 double gpu_tensorcore_utilization = 19; 79} 80