1syntax = "proto3";
2package tensorflow;
3
4import "tensorflow/core/framework/graph.proto";
5import "tensorflow/core/protobuf/config.proto";
6import "tensorflow/core/profiler/op_profile.proto";
7
8// The ProfilerService service retrieves performance information about
9// the programs running on connected devices over a period of time.
10service ProfilerService {
11  // Starts a profiling session, blocks until it completes, and returns data.
12  rpc Profile(ProfileRequest) returns (ProfileResponse) {
13  }
14  // Collects profiling data and returns user-friendly metrics.
15  rpc Monitor(MonitorRequest) returns (MonitorResponse) {
16  }
17}
18
19message ProfileOptions {
20  // We don't collect the dataset ops by default for better trace-viewer
21  // scalability. The caller can mannually set this field to include the ops.
22  bool include_dataset_ops = 1;
23
24  // next-field: 2
25}
26
27message ToolRequestOptions {
28  // Required formats for the tool, it should be one of "json", "proto", "raw"
29  // etc. If not specified (backward compatible), use default format, i.e. most
30  // tools use json format.
31  string output_formats = 2;
32
33  // Whether save the result directly to repository or pass it back to caller.
34  // Default to false for backward compatibilities.
35  bool save_to_repo = 3;
36}
37
38message ProfileRequest {
39  // In future, the caller will be able to customize when profiling starts and
40  // stops. For now, it collects `duration_ms` milliseconds worth of data.
41  uint64 duration_ms = 1;
42
43  // The maximum number of events to return. By default (value 0), return all
44  // events.
45  uint64 max_events = 2;
46
47  // Required profiling tools name such as "input_pipeline_analyzer" etc
48  repeated string tools = 3;
49
50  // Specifies the requirement for each tools.
51  map<string, ToolRequestOptions> tool_options = 8;
52
53  // Optional profiling options that control how a TF session will be profiled.
54  ProfileOptions opts = 4;
55
56  // The place where we will dump profile data. We will normally use
57  // MODEL_DIR/plugin/profile/ as our repository root.
58  string repository_root = 5;
59
60  // The user provided profile session identifier.
61  string session_id = 6;
62
63  // The hostname of system where the profile should happen.
64  // We use it as identifier in part of our output filename.
65  string host_name = 7;
66
67  // In future, the caller will indicate which TF session is being profiled, and
68  // only data relating to that program will be returned. For now, we assume
69  // all activity during the profiling period is relevant.
70  // next-field: 9
71}
72
73message ProfileToolData {
74  // The file name which this data is associated (e.g. "input_pipeline.json",
75  // "cluster_xxx.memory_viewer.json").
76  string name = 1;
77
78  // The data payload (likely json) for the specific tool.
79  bytes data = 2;
80}
81
82message ProfileResponse {
83  reserved 1;  // was uint64 placeholder for returning something meaningful.
84  // Graphs of programs executed on devices during the profiling period.
85  repeated GraphDef computation_graph = 2;
86
87  // Performance profile that can be used to annotate HLO operations in the
88  // computation graph.
89  RunMetadata hlo_metadata = 5;
90
91  // Encoded Trace proto message that contains metadata about the trace captured
92  // during the profiling period. Describes the devices and resources that
93  // 'trace_events' refers to.
94  bytes encoded_trace = 3;
95
96  // Assembles a hierarchical performance profile based on HLOs in trace events.
97  // If the trace covers multiple programs, the longest-running one is analyzed.
98  // See op_profile.proto for the detailed semantics of the returned profile.
99  profiler.op_profile.Profile op_profile = 4;
100
101  // Data payload for each required tools.
102  repeated ProfileToolData tool_data = 6;
103
104  // When we write profiling data directly to repository directory, we need a
105  // way to figure out whether the captured trace is empty (due to idle TPU).
106  bool empty_trace = 7;
107
108  // next-field: 8
109}
110
111message MonitorRequest {
112  // Duration for which to profile between each update.
113  uint64 duration_ms = 1;
114
115  // Indicates the level at which we want to monitor. Currently, two levels are
116  // supported:
117  // Level 1: An ultra lightweight mode that captures only some utilization
118  // metrics.
119  // Level 2: More verbose than level 1. Collects utilization metrics, device
120  // information, step time information, etc. Do not use this option if the TPU
121  // host is being very heavily used.
122  int32 monitoring_level = 2;
123
124  // next-field: 3
125}
126
127message MonitorResponse {
128  // Properly formatted string data that can be directly returned back to user.
129  string data = 1;
130
131  // next-field: 2
132}
133