1syntax = "proto3";
2
3package tensorflow;
4
5import "tensorflow/core/framework/tensor.proto";
6import "tensorflow/core/protobuf/graph_debug_info.proto";
7
8option cc_enable_arenas = true;
9option java_outer_classname = "DebugEventProtos";
10option java_multiple_files = true;
11option java_package = "org.tensorflow.util";
12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
13
14// Available modes for extracting debugging information from a Tensor.
15// TODO(cais): Document the detailed column names and semantics in a separate
16// markdown file once the implementation settles.
17enum TensorDebugMode {
18  UNSPECIFIED = 0;
19
20  // Only records what tensors are computed, eagerly or in graphs.
21  // No information regarding the value of the tensor is available.
22  NO_TENSOR = 1;
23
24  // A minimalist health summary for float-type tensors.
25  // Contains information only about the presence/absence of pathological
26  // values including Infinity and NaN.
27  // Applicable only to float dtypes.
28  CURT_HEALTH = 2;
29
30  // A concise health summary for float-type tensors.
31  // Contains more information that CURT_HEALTH.
32  // Infinity and NaN are treated differently.
33  // Applicable only to float and integer dtypes.
34  CONCISE_HEALTH = 3;
35
36  // A detailed health summary.
37  // Contains further detailed information than `CONCISE_HEALTH`.
38  // Information about device, dtype and shape are included.
39  // Counts for various types of values (Infinity, NaN, negative, zero,
40  // positive) are included.
41  // Applicable to float, integer and boolean dtypes.
42  FULL_HEALTH = 4;
43
44  // Provides full runtime shape information, up to a maximum rank, beyond
45  // which the dimension sizes are truncated.
46  SHAPE = 5;
47
48  // Full numeric summary.
49  // Including device, dtype, shape, counts of various types of values
50  // (Infinity, NaN, negative, zero, positive), and summary statistics
51  // (minimum, maximum, mean and variance).
52  // Applicable to float, integer and boolean dtypes.
53  FULL_NUMERICS = 6;
54
55  // Full tensor value.
56  FULL_TENSOR = 7;
57
58  // Reduce the elements of a tensor to a rank-1 tensor of shape [3], in which
59  // - the 1st element is -inf if any element of the tensor is -inf,
60  //   or zero otherwise.
61  // - the 2nd element is +inf if any element of the tensor is +inf,
62  //   or zero otherwise.
63  // - the 3rd element is nan if any element of the tensor is nan, or zero
64  //   otherwise.
65  REDUCE_INF_NAN_THREE_SLOTS = 8;
66}
67
68// An Event related to the debugging of a TensorFlow program.
69message DebugEvent {
70  // Timestamp in seconds (with microsecond precision).
71  double wall_time = 1;
72
73  // Step of training (if available).
74  int64 step = 2;
75
76  oneof what {
77    // Metadata related to this debugging data.
78    DebugMetadata debug_metadata = 3;
79
80    // The content of a source file.
81    SourceFile source_file = 4;
82
83    // A stack frame (filename, line number and column number, function name and
84    // code string) with ID.
85    StackFrameWithId stack_frame_with_id = 6;
86
87    // The creation of an op within a graph (e.g., a FuncGraph compiled from
88    // a Python function).
89    GraphOpCreation graph_op_creation = 7;
90
91    // Information about a debugged graph.
92    DebuggedGraph debugged_graph = 8;
93
94    // Execution of an op or a Graph (e.g., a tf.function).
95    Execution execution = 9;
96
97    // A graph execution trace: Contains information about the intermediate
98    // tensors computed during the graph execution.
99    GraphExecutionTrace graph_execution_trace = 10;
100
101    // The ID of the graph (i.e., FuncGraph) executed here: applicable only
102    // to the execution of a FuncGraph.
103    string graph_id = 11;
104
105    // A device on which debugger-instrumented ops and/or tensors reside.
106    DebuggedDevice debugged_device = 12;
107  }
108}
109
110// Metadata about the debugger and the debugged TensorFlow program.
111message DebugMetadata {
112  // Version of TensorFlow.
113  string tensorflow_version = 1;
114
115  // Version of the DebugEvent file format.
116  // Has a format of "debug.Event:<number>", e.g., "debug.Event:1".
117  string file_version = 2;
118
119  // A unique ID for the current run of tfdbg.
120  // A run of tfdbg is defined as a TensorFlow job instrumented by tfdbg.
121  // Multiple hosts in a distributed TensorFlow job instrumented by tfdbg
122  // have the same ID.
123  string tfdbg_run_id = 3;
124}
125
126// Content of a source file involved in the execution of the debugged TensorFlow
127// program.
128message SourceFile {
129  // Path to the file.
130  string file_path = 1;
131
132  // Name of the host on which the file is located.
133  string host_name = 2;
134
135  // Line-by-line content of the file.
136  repeated string lines = 3;
137}
138
139// A stack frame with ID.
140message StackFrameWithId {
141  // A unique ID for the stack frame: A UUID-like string.
142  string id = 1;
143
144  // Stack frame, i.e., a frame of a stack trace, containing information
145  // regarding the file name, line number, function name, code content
146  // of the line, and column number (if available).
147  GraphDebugInfo.FileLineCol file_line_col = 2;
148}
149
150// Code location information: A stack trace with host-name information.
151// Instead of encoding the detailed stack trace, this proto refers to IDs of
152// stack frames stored as `StackFrameWithId` protos.
153message CodeLocation {
154  // Host name on which the source files are located.
155  string host_name = 1;
156
157  // ID to a stack frame, each of which is pointed to
158  // by a unique ID. The ordering of the frames is consistent with Python's
159  // `traceback.extract_tb()`.
160  repeated string stack_frame_ids = 2;
161}
162
163// The creation of an op in a TensorFlow Graph (e.g., FuncGraph in TF2).
164message GraphOpCreation {
165  // Type of the op (e.g., "MatMul").
166  string op_type = 1;
167
168  // Name of the op (e.g., "Dense/MatMul_1").
169  string op_name = 2;
170
171  // Name of the graph that the op is a part of (if available).
172  string graph_name = 3;
173
174  // Unique ID of the graph (generated by debugger).
175  // This is the ID of the immediately-enclosing graph.
176  string graph_id = 4;
177
178  // Name of the device that the op is assigned to (if available).
179  string device_name = 5;
180
181  // Names of the input tensors to the op.
182  repeated string input_names = 6;
183
184  // Number of output tensors emitted by the op.
185  int32 num_outputs = 7;
186
187  // The unique ID for code location (stack trace) of the op's creation.
188  CodeLocation code_location = 8;
189
190  // Unique IDs for the output tensors of this op.
191  repeated int32 output_tensor_ids = 9;
192}
193
194// A debugger-instrumented graph.
195message DebuggedGraph {
196  // An ID for the graph.
197  // This can be used up to look up graph names. Generated by the debugger.
198  string graph_id = 1;
199
200  // Name of the graph (if available).
201  string graph_name = 2;
202
203  // Names of the instrumented ops. This can be used to look up op name
204  // based on the numeric-summary tensors (2nd column).
205  repeated string instrumented_ops = 3;
206
207  // Original (uninstrumented) GraphDef (if available).
208  bytes original_graph_def = 4;
209
210  // An encoded version of a GraphDef.
211  // This graph may include the debugger-inserted ops.
212  bytes instrumented_graph_def = 5;
213
214  // IDs of the immediate enclosing context (graph), if any.
215  string outer_context_id = 6;
216}
217
218// A device on which ops and/or tensors are instrumented by the debugger.
219message DebuggedDevice {
220  // Name of the device.
221  string device_name = 1;
222
223  // A debugger-generated ID for the device. Guaranteed to be unique within
224  // the scope of the debugged TensorFlow program, including single-host and
225  // multi-host settings.
226  // TODO(cais): Test the uniqueness guarantee in multi-host settings.
227  int32 device_id = 2;
228}
229
230// Data relating to the eager execution of an op or a Graph.
231// For a op that generates N output tensors (N >= 0), only one
232// Execution proto will be used to describe the execution event.
233message Execution {
234  // Op type (e.g., "MatMul").
235  // In the case of a Graph, this is the name of the Graph.
236  string op_type = 1;
237
238  // Number of output tensors.
239  int32 num_outputs = 2;
240
241  // The graph that's executed: applicable only to the eager
242  // execution of a FuncGraph.
243  string graph_id = 3;
244
245  // IDs of the input tensors (if available).
246  repeated int64 input_tensor_ids = 4;
247
248  // IDs of the output tensors (if availbable).
249  // If specified, must have the same length as tensor_protos.
250  repeated int64 output_tensor_ids = 5;
251
252  // Type of the tensor value encapsulated in this proto.
253  TensorDebugMode tensor_debug_mode = 6;
254
255  // Output Tensor values in the type described by `tensor_value_type`.
256  // The length of this should match `num_outputs`.
257  repeated TensorProto tensor_protos = 7;
258
259  // Stack trace of the eager execution.
260  CodeLocation code_location = 8;
261
262  // Debugged-generated IDs of the devices on which the output tensors reside.
263  // To look up details about the device (e.g., name), cross-reference this
264  // field with the DebuggedDevice messages.
265  repeated int32 output_tensor_device_ids = 9;
266
267  // TODO(cais): When backporting to V1 Session.run() support, add more fields
268  // such as fetches and feeds.
269}
270
271// Data relating to an execution of a Graph (e.g., an eager execution of a
272// FuncGraph).
273// The values of the intermediate tensors computed in the graph are recorded
274// in this proto. A graph execution may correspond to one or more pieces of
275// `GraphExecutionTrace`, depending on whether the instrumented tensor values
276// are summarized in an aggregated or separate fashion.
277message GraphExecutionTrace {
278  // Unique ID of the context that the executed op(s) belong to (e.g., a
279  // compiled concrete tf.function).
280  string tfdbg_context_id = 1;
281
282  // Name of the op (applicable only in the case of the `FULL_TENSOR` trace
283  // level).
284  string op_name = 2;
285
286  // Output slot of the tensor (applicable only in the case of the `FULL_TENSOR`
287  // trace level).
288  int32 output_slot = 3;
289
290  // Type of the tensor value encapsulated in this proto.
291  TensorDebugMode tensor_debug_mode = 4;
292
293  // Tensor value in the type described by `tensor_value_type`.
294  // This tensor may summarize the value of a single intermediate op of the
295  // graph, or those of multiple intermediate tensors.
296  TensorProto tensor_proto = 5;
297
298  // Name of the device that the op belongs to.
299  string device_name = 6;
300}
301