1syntax = "proto3";
2
3package tensorflow.tfprof;
4option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/profiler/tfprof_log_go_proto";
5
6import "tensorflow/core/framework/attr_value.proto";
7import "tensorflow/core/framework/step_stats.proto";
8
9// It specifies the Python callstack that creates an op.
10message CodeDef {
11  repeated Trace traces = 1;
12  message Trace {
13    string file = 1 [deprecated = true];  // deprecated by file_id.
14    int64 file_id = 6;
15
16    int32 lineno = 2;
17
18    string function = 3 [deprecated = true];  // deprecated by function_id.
19    int64 function_id = 7;
20
21    string line = 4 [deprecated = true];  // deprecated line_id.
22    int64 line_id = 8;
23
24    int32 func_start_line = 5;
25  }
26}
27
28message OpLogEntry {
29  // op name.
30  string name = 1;
31  // float_ops is filled by tfprof Python API when called. It requires the
32  // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are
33  // implemented.
34  int64 float_ops = 2;
35  // User can define extra op type information for an op. This allows the user
36  // to select a group of ops precisely using op_type as a key.
37  repeated string types = 3;
38  // Used to support tfprof "code" view.
39  CodeDef code_def = 4;
40}
41
42message OpLogProto {
43  repeated OpLogEntry log_entries = 1;
44
45  // Maps from id of CodeDef file,function,line to its string
46  // In the future can also map other id of other fields to string.
47  map<int64, string> id_to_string = 2;
48}
49
50// A proto representation of the profiler's profile.
51// It allows serialization, shipping around and deserialization of the profiles.
52//
53// Please don't depend on the internals of the profile proto.
54message ProfileProto {
55  map<int64, ProfileNode> nodes = 1;
56  // Whether or not has code traces.
57  bool has_trace = 2;
58  // Whether or not the TF device tracer fails to return accelerator
59  // information (which could lead to 0 accelerator execution time).
60  bool miss_accelerator_stream = 5;
61  // Traced steps.
62  repeated int64 steps = 3;
63
64  // Maps from id of CodeDef file,function,line to its string
65  // In the future can also map other id of other fields to string.
66  map<int64, string> id_to_string = 4;
67}
68
69message ProfileNode {
70  // graph node name.
71  string name = 1;
72  // graph operation type.
73  string op = 9;
74  // A unique id for the node.
75  int64 id = 13;
76
77  map<int32, int64> inputs = 2;
78  map<int32, Tuple> input_shapes = 16;
79  map<int32, int64> outputs = 3;
80  map<int32, Tuple> output_shapes = 15;
81  // A map from source node id to its output index to current node.
82  map<int64, int32> src_output_index = 14;
83
84  repeated int64 shape = 4;
85  repeated string op_types = 5;
86  string canonical_device = 6;
87  string host_device = 7;
88
89  int64 float_ops = 8;
90
91  CodeDef trace = 10;
92  map<string, AttrValue> attrs = 11;
93
94  map<int64, ExecProfile> execs = 12;
95}
96
97message ExecProfile {
98  // Can be larger than 1 if run multiple times in loop.
99  int64 run_count = 1;
100  // The earliest/latest time including scheduling and execution.
101  int64 all_start_micros = 2;
102  int64 latest_end_micros = 3;
103
104  // device -> vector of {op_start_micros, op_exec_micros} pairs.
105  // accelerator_execs: gpu:id/stream:all -> {op_start_micros, op_exec_micros}
106  // For accelerator, vector size can be larger than 1, multiple kernel fires
107  // or in tf.while_loop.
108  map<string, ExecTime> accelerator_execs = 4;
109  // cpu_execs: cpu/gpu:id -> {op_start_micros, op_exec_micros}
110  // For cpu, vector size can be larger than 1 if in tf.while_loop.
111  map<string, ExecTime> cpu_execs = 5;
112
113  // Each entry to memory information of a scheduling of the node.
114  // Normally, there will be multiple entries in while_loop.
115  repeated ExecMemory memory_execs = 7;
116  // The allocation and deallocation times and sizes throughout execution.
117  repeated AllocationRecord allocations = 11;
118  // The devices related to this execution.
119  repeated string devices = 6;
120}
121
122message ExecTime {
123  repeated Tuple times = 1;
124}
125
126message ExecMemory {
127  // This is the timestamp when the memory information was tracked.
128  int64 memory_micros = 1;
129  // NOTE: Please don't depend on the following 4 fields yet. Due to
130  // TensorFlow internal tracing issues, the numbers can be quite wrong.
131  // TODO(xpan): Fix the TensorFlow internal tracing.
132  int64 host_temp_bytes = 2;
133  int64 host_persistent_bytes = 3;
134  int64 accelerator_temp_bytes = 4;
135  int64 accelerator_persistent_bytes = 5;
136
137  // Total bytes requested by the op.
138  int64 requested_bytes = 6;
139  // Total bytes requested by the op and released before op end.
140  int64 peak_bytes = 7;
141  // Total bytes requested by the op and not released after op end.
142  int64 residual_bytes = 8;
143  // Total bytes output by the op (not necessarily requested by the op).
144  int64 output_bytes = 9;
145  // The total number of bytes currently allocated by the allocator if >0.
146  int64 allocator_bytes_in_use = 10;
147  // The memory of each output of the operation.
148  map<int32, Memory> output_memory = 11;
149}
150
151message Tuple {
152  repeated int64 int64_values = 1;
153}
154
155message Memory {
156  int64 bytes = 1;
157  uint64 ptr = 2;
158}
159