1syntax = "proto3";
2
3package tensorflow.tfprof;
4
5import "tensorflow/core/framework/attr_value.proto";
6import "tensorflow/core/framework/step_stats.proto";
7
8// It specifies the Python callstack that creates an op.
9message CodeDef {
10  repeated Trace traces = 1;
11  message Trace {
12    string file = 1 [deprecated = true];  // deprecated by file_id.
13    int64 file_id = 6;
14
15    int32 lineno = 2;
16
17    string function = 3 [deprecated = true];  // deprecated by function_id.
18    int64 function_id = 7;
19
20    string line = 4 [deprecated = true];  // deprecated line_id.
21    int64 line_id = 8;
22
23    int32 func_start_line = 5;
24  }
25}
26
27message OpLogEntry {
28  // op name.
29  string name = 1;
30  // float_ops is filled by tfprof Python API when called. It requires the
31  // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are
32  // implemented.
33  int64 float_ops = 2;
34  // User can define extra op type information for an op. This allows the user
35  // to select a group of ops precisely using op_type as a key.
36  repeated string types = 3;
37  // Used to support tfprof "code" view.
38  CodeDef code_def = 4;
39}
40
41message OpLogProto {
42  repeated OpLogEntry log_entries = 1;
43
44  // Maps from id of CodeDef file,function,line to its string
45  // In the future can also map other id of other fields to string.
46  map<int64, string> id_to_string = 2;
47}
48
49// A proto representation of the profiler's profile.
50// It allows serialization, shipping around and deserialization of the profiles.
51//
52// Please don't depend on the internals of the profile proto.
53message ProfileProto {
54  map<int64, ProfileNode> nodes = 1;
55  // Whether or not has code traces.
56  bool has_trace = 2;
57  // Whether or not the TF device tracer fails to return accelerator
58  // information (which could lead to 0 accelerator execution time).
59  bool miss_accelerator_stream = 5;
60  // Traced steps.
61  repeated int64 steps = 3;
62
63  // Maps from id of CodeDef file,function,line to its string
64  // In the future can also map other id of other fields to string.
65  map<int64, string> id_to_string = 4;
66}
67
68message ProfileNode {
69  // graph node name.
70  string name = 1;
71  // graph operation type.
72  string op = 9;
73  // A unique id for the node.
74  int64 id = 13;
75
76  map<int32, int64> inputs = 2;
77  map<int32, Tuple> input_shapes = 16;
78  map<int32, int64> outputs = 3;
79  map<int32, Tuple> output_shapes = 15;
80  // A map from source node id to its output index to current node.
81  map<int64, int32> src_output_index = 14;
82
83  repeated int64 shape = 4;
84  repeated string op_types = 5;
85  string canonical_device = 6;
86  string host_device = 7;
87
88  int64 float_ops = 8;
89
90  CodeDef trace = 10;
91  map<string, AttrValue> attrs = 11;
92
93  map<int64, ExecProfile> execs = 12;
94}
95
96message ExecProfile {
97  // Can be larger than 1 if run multiple times in loop.
98  int64 run_count = 1;
99  // The earliest/latest time including scheduling and execution.
100  int64 all_start_micros = 2;
101  int64 latest_end_micros = 3;
102
103  // device -> vector of {op_start_micros, op_exec_micros} pairs.
104  // accelerator_execs: gpu:id/stream:all -> {op_start_micros, op_exec_micros}
105  // For accelerator, vector size can be larger than 1, multiple kernel fires
106  // or in tf.while_loop.
107  map<string, ExecTime> accelerator_execs = 4;
108  // cpu_execs: cpu/gpu:id -> {op_start_micros, op_exec_micros}
109  // For cpu, vector size can be larger than 1 if in tf.while_loop.
110  map<string, ExecTime> cpu_execs = 5;
111
112  // Each entry to memory information of a scheduling of the node.
113  // Normally, there will be multiple entries in while_loop.
114  repeated ExecMemory memory_execs = 7;
115  // The allocation and deallocation times and sizes throughout execution.
116  repeated AllocationRecord allocations = 11;
117  // The devices related to this execution.
118  repeated string devices = 6;
119}
120
121message ExecTime {
122  repeated Tuple times = 1;
123}
124
125message ExecMemory {
126  // This is the timestamp when the memory information was tracked.
127  int64 memory_micros = 1;
128  // NOTE: Please don't depend on the following 4 fields yet. Due to
129  // TensorFlow internal tracing issues, the numbers can be quite wrong.
130  // TODO(xpan): Fix the TensorFlow internal tracing.
131  int64 host_temp_bytes = 2;
132  int64 host_persistent_bytes = 3;
133  int64 accelerator_temp_bytes = 4;
134  int64 accelerator_persistent_bytes = 5;
135
136  // Total bytes requested by the op.
137  int64 requested_bytes = 6;
138  // Total bytes requested by the op and released before op end.
139  int64 peak_bytes = 7;
140  // Total bytes requested by the op and not released after op end.
141  int64 residual_bytes = 8;
142  // Total bytes output by the op (not necessarily requested by the op).
143  int64 output_bytes = 9;
144  // The total number of bytes currently allocated by the allocator if >0.
145  int64 allocator_bytes_in_use = 10;
146  // The memory of each output of the operation.
147  map<int32, Memory> output_memory = 11;
148}
149
150message Tuple {
151  repeated int64 int64_values = 1;
152}
153
154message Memory {
155  int64 bytes = 1;
156  uint64 ptr = 2;
157}
158