1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5import "google/protobuf/any.proto"; 6import "tensorflow/core/profiler/protobuf/diagnostics.proto"; 7 8// Generic hardware bottleneck. 9message BottleneckAnalysis { 10 // Percentage of step time that is spent on input. 11 double input_percent = 7; 12 // Percentage of step time that is spent on output. 13 double output_percent = 8; 14 // Percentage of step time that is idle for non-I/O-related reason. 15 double idle_percent = 9; 16 // Percentage of step time that is spent on compute. 17 double compute_percent = 10; 18 // Indicates if input is a bottleneck. Possible values: "host", "device", 19 // "both", or "unknown" 20 string input_classification = 1; 21 // A human-readable description of the input bottleneck. 22 string input_statement = 2; 23 // Indicates if kernel launching is a bottleneck. Possible values: "no", 24 // "moderate", "high". 25 string kernel_launch_classification = 3; 26 // A human-readable description of the kernel launching overhead. 27 string kernel_launch_statement = 4; 28 // Indicates if all other is a bottleneck. Possible values: "no", "moderate", 29 // "high". 30 string all_other_classification = 5; 31 // A human-readable description of the all other overhead. 32 string all_other_statement = 6; 33 // Indicates if device collective communication is a bottleneck. Possible 34 // values: "no", "moderate", "high". 35 string device_collectives_classification = 11; 36 // A human-readable description of the device collective communication 37 // overhead. 38 string device_collectives_statement = 12; 39} 40 41// Used for both step duration and Op duration. 42message StepSummary { 43 double average = 1; 44 double standard_deviation = 2; 45 double minimum = 3; 46 double maximum = 4; 47} 48 49// Per-step details on generic hardware. 50message PerGenericStepDetails { 51 // The step number of a step. 52 int32 step_number = 1; 53 // The step time (in ms). 54 double step_time_ms = 2; 55 // Breakdown of the step time in different event categories. 56 // The unknown time (in ms). 57 double unknown_time_ms = 3; 58 // The time (in ms) in which the host is waiting for input data to be ready. 59 double host_wait_input_ms = 11; 60 // The time (in ms) in which the host is sending input data to the device. 61 // Total input time = host_wait_input_ms + host_to_device_ms. 62 double host_to_device_ms = 12; 63 // The output time (in ms). 64 double output_ms = 5; 65 // The device-compute time (in ms). 66 double device_compute_ms = 6; 67 // The device-to-device communication time (in ms). 68 double device_to_device_ms = 7; 69 // The device time spent on collective communications (in ms). 70 double device_collectives_ms = 13; 71 // The host-compute time (in ms). 72 double host_compute_ms = 8; 73 // The host-prepare time (in ms). 74 double host_prepare_ms = 9; 75 // The time spent on compiling (in ms). 76 double host_compile_ms = 10; 77 reserved 4; 78} 79 80message InputTimeBreakdown { 81 // Time spent on demanded file read in microseconds. 82 double demanded_file_read_us = 1; 83 // Time spent on advanced file read in microseconds. 84 double advanced_file_read_us = 2; 85 // Time spent on data preprocessing in microseconds. 86 double preprocessing_us = 3; 87 // The infeed enqueue time in microseconds. 88 double enqueue_us = 4; 89 // This entry is for the situtation where we can't further 90 // break down the non-enqueue input time (because the input pipeline 91 // is not instrumented). 92 double unclassified_non_enqueue_us = 5; 93} 94 95message InputOpDetails { 96 // The Op's name. 97 string op_name = 1; 98 // The number of occurrences. 99 uint64 count = 2; 100 // Time (accumulated over all occurrences) in milliseconds. 101 double time_in_ms = 3; 102 // Time (accumulated over all occurrences) in 103 // percentage of the total input processing time. 104 double time_in_percent = 4; 105 // Self time (accumulated over all occurrences) in milliseconds. 106 double self_time_in_ms = 5; 107 // Self time (accumulated over all occurrences) in 108 // percentage of the total input processing time. 109 double self_time_in_percent = 6; 110 // Possible categories: "Enqueue", "Advanced file read", 111 // "Demanded file read", "Preprocessing", "Unknown". 112 string category = 7; 113} 114 115message InputPipelineAnalysisRecommendation { 116 // A list of detailed recommendations. 117 repeated string details = 1; 118 // An analysis of different types of bottlenecks. Can be unpacked into a 119 // BottleneckAnalysis. 120 google.protobuf.Any bottleneck_analysis = 2; 121 // A suggested step to take next. 122 string summary_next_step = 3; 123} 124 125message GenericStepTimeBreakdown { 126 // Summary of all unknown time as a part of step in ms. 127 StepSummary unknown_time_ms_summary = 1; 128 // Summary of all host-wait-input time as a part of step in ms. 129 StepSummary host_wait_input_ms_summary = 9; 130 // Summary of all host-to-device time as a part of step in ms. 131 StepSummary host_to_device_ms_summary = 10; 132 // Summary of all input time as a part of step in ms. 133 StepSummary input_ms_summary = 11; 134 // Summary of all output time as a part of step in ms. 135 StepSummary output_ms_summary = 3; 136 // Summary of all device-compute time as a part of step in ms. 137 StepSummary device_compute_ms_summary = 4; 138 // Summary of all device-to-device time as a part of step in ms. 139 StepSummary device_to_device_ms_summary = 5; 140 // Summary of all device-collectives time as a part of step in ms. 141 StepSummary device_collectives_ms_summary = 12; 142 // Summary of all host-compute time as a part of step in ms. 143 StepSummary host_compute_ms_summary = 6; 144 // Summary of all host-prepare time as a part of step in ms. 145 StepSummary host_prepare_ms_summary = 7; 146 // Summary of all compilation time as a part of step in ms. 147 StepSummary host_compile_ms_summary = 8; 148 reserved 2; 149} 150 151message InputPipelineAnalysisResult { 152 // Hardware type. 153 string hardware_type = 9; 154 // Summary of all step duration across all cores. 155 StepSummary step_time_summary = 2; 156 // Summary of all input-related stall as percentage of step duration. 157 StepSummary input_percent_summary = 3; 158 // Percentage of step time that is waiting for input. 159 double input_percent = 11; 160 // Percentage of step time that is doing output. 161 double output_percent = 13; 162 // Percentage of step time that is idle for non-I/O-related reason. 163 double idle_percent = 14; 164 // Percentage of step time that is doing compute. 165 double compute_percent = 15; 166 // Details of each step. Can be unpacked into a PerGenericStepDetails. 167 repeated google.protobuf.Any step_details = 4; 168 // The breakdown of the input processing time. 169 InputTimeBreakdown input_time_breakdown = 5; 170 // Details of each input Op executed. 171 repeated InputOpDetails input_op_details = 6; 172 // Recommendation for next steps to users. 173 InputPipelineAnalysisRecommendation recommendation = 7; 174 // Breakdown of the step time. Can be unpacked into a 175 // GenericStepTimeBreakdown. 176 google.protobuf.Any step_time_breakdown = 8; 177 // Error and warning messages for diagnosing profiling issues. 178 Diagnostics diagnostics = 12; 179 reserved 1, 10; 180} 181