1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "google/protobuf/any.proto";
6import "tensorflow/core/profiler/protobuf/diagnostics.proto";
7
8// Generic hardware bottleneck.
9message BottleneckAnalysis {
10  // Percentage of step time that is spent on input.
11  double input_percent = 7;
12  // Percentage of step time that is spent on output.
13  double output_percent = 8;
14  // Percentage of step time that is idle for non-I/O-related reason.
15  double idle_percent = 9;
16  // Percentage of step time that is spent on compute.
17  double compute_percent = 10;
18  // Indicates if input is a bottleneck. Possible values:  "host", "device",
19  // "both", or "unknown"
20  string input_classification = 1;
21  // A human-readable description of the input bottleneck.
22  string input_statement = 2;
23  // Indicates if kernel launching is a bottleneck. Possible values: "no",
24  // "moderate", "high".
25  string kernel_launch_classification = 3;
26  // A human-readable description of the kernel launching overhead.
27  string kernel_launch_statement = 4;
28  // Indicates if all other is a bottleneck. Possible values: "no", "moderate",
29  // "high".
30  string all_other_classification = 5;
31  // A human-readable description of the all other overhead.
32  string all_other_statement = 6;
33  // Indicates if device collective communication is a bottleneck. Possible
34  // values: "no", "moderate", "high".
35  string device_collectives_classification = 11;
36  // A human-readable description of the device collective communication
37  // overhead.
38  string device_collectives_statement = 12;
39}
40
41// Used for both step duration and Op duration.
42message StepSummary {
43  double average = 1;
44  double standard_deviation = 2;
45  double minimum = 3;
46  double maximum = 4;
47}
48
49// Per-step details on generic hardware.
50message PerGenericStepDetails {
51  // The step number of a step.
52  int32 step_number = 1;
53  // The step time (in ms).
54  double step_time_ms = 2;
55  // Breakdown of the step time in different event categories.
56  // The unknown time (in ms).
57  double unknown_time_ms = 3;
58  // The time (in ms) in which the host is waiting for input data to be ready.
59  double host_wait_input_ms = 11;
60  // The time (in ms) in which the host is sending input data to the device.
61  // Total input time = host_wait_input_ms + host_to_device_ms.
62  double host_to_device_ms = 12;
63  // The output time (in ms).
64  double output_ms = 5;
65  // The device-compute time (in ms).
66  double device_compute_ms = 6;
67  // The device-to-device communication time (in ms).
68  double device_to_device_ms = 7;
69  // The device time spent on collective communications (in ms).
70  double device_collectives_ms = 13;
71  // The host-compute time (in ms).
72  double host_compute_ms = 8;
73  // The host-prepare time (in ms).
74  double host_prepare_ms = 9;
75  // The time spent on compiling (in ms).
76  double host_compile_ms = 10;
77  reserved 4;
78}
79
80message InputTimeBreakdown {
81  // Time spent on demanded file read in microseconds.
82  double demanded_file_read_us = 1;
83  // Time spent on advanced file read in microseconds.
84  double advanced_file_read_us = 2;
85  // Time spent on data preprocessing in microseconds.
86  double preprocessing_us = 3;
87  // The infeed enqueue time in microseconds.
88  double enqueue_us = 4;
89  // This entry is for the situtation where we can't further
90  // break down the non-enqueue input time (because the input pipeline
91  // is not instrumented).
92  double unclassified_non_enqueue_us = 5;
93}
94
95message InputOpDetails {
96  // The Op's name.
97  string op_name = 1;
98  // The number of occurrences.
99  uint64 count = 2;
100  // Time (accumulated over all occurrences) in milliseconds.
101  double time_in_ms = 3;
102  // Time (accumulated over all occurrences) in
103  // percentage of the total input processing time.
104  double time_in_percent = 4;
105  // Self time (accumulated over all occurrences) in milliseconds.
106  double self_time_in_ms = 5;
107  // Self time (accumulated over all occurrences) in
108  // percentage of the total input processing time.
109  double self_time_in_percent = 6;
110  // Possible categories: "Enqueue", "Advanced file read",
111  // "Demanded file read", "Preprocessing", "Unknown".
112  string category = 7;
113}
114
115message InputPipelineAnalysisRecommendation {
116  // A list of detailed recommendations.
117  repeated string details = 1;
118  // An analysis of different types of bottlenecks. Can be unpacked into a
119  // BottleneckAnalysis.
120  google.protobuf.Any bottleneck_analysis = 2;
121  // A suggested step to take next.
122  string summary_next_step = 3;
123}
124
125message GenericStepTimeBreakdown {
126  // Summary of all unknown time as a part of step in ms.
127  StepSummary unknown_time_ms_summary = 1;
128  // Summary of all host-wait-input time as a part of step in ms.
129  StepSummary host_wait_input_ms_summary = 9;
130  // Summary of all host-to-device time as a part of step in ms.
131  StepSummary host_to_device_ms_summary = 10;
132  // Summary of all input time as a part of step in ms.
133  StepSummary input_ms_summary = 11;
134  // Summary of all output time as a part of step in ms.
135  StepSummary output_ms_summary = 3;
136  // Summary of all device-compute time as a part of step in ms.
137  StepSummary device_compute_ms_summary = 4;
138  // Summary of all device-to-device time as a part of step in ms.
139  StepSummary device_to_device_ms_summary = 5;
140  // Summary of all device-collectives time as a part of step in ms.
141  StepSummary device_collectives_ms_summary = 12;
142  // Summary of all host-compute time as a part of step in ms.
143  StepSummary host_compute_ms_summary = 6;
144  // Summary of all host-prepare time as a part of step in ms.
145  StepSummary host_prepare_ms_summary = 7;
146  // Summary of all compilation time as a part of step in ms.
147  StepSummary host_compile_ms_summary = 8;
148  reserved 2;
149}
150
151message InputPipelineAnalysisResult {
152  // Hardware type.
153  string hardware_type = 9;
154  // Summary of all step duration across all cores.
155  StepSummary step_time_summary = 2;
156  // Summary of all input-related stall as percentage of step duration.
157  StepSummary input_percent_summary = 3;
158  // Percentage of step time that is waiting for input.
159  double input_percent = 11;
160  // Percentage of step time that is doing output.
161  double output_percent = 13;
162  // Percentage of step time that is idle for non-I/O-related reason.
163  double idle_percent = 14;
164  // Percentage of step time that is doing compute.
165  double compute_percent = 15;
166  // Details of each step. Can be unpacked into a PerGenericStepDetails.
167  repeated google.protobuf.Any step_details = 4;
168  // The breakdown of the input processing time.
169  InputTimeBreakdown input_time_breakdown = 5;
170  // Details of each input Op executed.
171  repeated InputOpDetails input_op_details = 6;
172  // Recommendation for next steps to users.
173  InputPipelineAnalysisRecommendation recommendation = 7;
174  // Breakdown of the step time. Can be unpacked into a
175  // GenericStepTimeBreakdown.
176  google.protobuf.Any step_time_breakdown = 8;
177  // Error and warning messages for diagnosing profiling issues.
178  Diagnostics diagnostics = 12;
179  reserved 1, 10;
180}
181