1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_TFPROF_TIMELINE_H_
17 #define TENSORFLOW_CORE_PROFILER_INTERNAL_TFPROF_TIMELINE_H_
18 
19 #include "include/json/json.h"
20 #include "tensorflow/core/framework/graph.pb.h"
21 #include "tensorflow/core/framework/step_stats.pb.h"
22 #include "tensorflow/core/lib/strings/strcat.h"
23 #include "tensorflow/core/profiler/internal/tfprof_node_show.h"
24 #include "tensorflow/core/protobuf/config.pb.h"
25 
26 namespace tensorflow {
27 namespace tfprof {
28 
29 typedef std::map<string, string> Event;
30 
31 // Class for generating timeline json output.
32 class ChromeTraceFormatter {
33  public:
ChromeTraceFormatter()34   ChromeTraceFormatter() {}
35   // The following methods creates timeline nodes. See chrome tracing format
36   // document for details.
37   Json::Value CreateEvent(const string& ph, const string& category,
38                           const string& name, int64 pid, int64 tid, int64 ts);
39 
40   void EmitPID(const string& name, int64 pid);
41 
42   void EmitRegion(int64 ts, int64 duration, int64 pid, int64 tid,
43                   const string& category, const string& name, Json::Value args);
44 
45   void EmitFlowStart(const string& name, int64 ts, int64 pid, int64 tid,
46                      int64 flow_id);
47 
48   void EmitFlowEnd(const string& name, int64 ts, int64 pid, int64 tid,
49                    int64 flow_id);
50 
51   void EmitCounter(const string& category, const string& name, int64 pid,
52                    int64 ts, const string& device, int64 bytes,
53                    const std::map<int64, std::vector<string>>& tensor_mem);
54 
55   string Format();
56 
57  private:
58   // A event is a visualization unit in timeline.
59   std::vector<Json::Value> events_;
60   std::vector<Json::Value> metadata_;
61 };
62 
63 // A process (time series of events) in the timeline.
64 class Process {
65  public:
Process(const string & device,int64 pid)66   Process(const string& device, int64 pid) : device(device), pid(pid) {}
67 
68   // Each lane is a map from start_time to end_time.
69   std::vector<std::map<int64, int64>> lanes;
70   // device for the time series.
71   string device;
72   // unique id for the time series.
73   int64 pid;
74 };
75 
76 class TimeNode {
77  public:
TimeNode(Process * process,GraphNode * node,int64 start_micros,int64 exec_micros)78   TimeNode(Process* process, GraphNode* node, int64 start_micros,
79            int64 exec_micros)
80       : process(process),
81         node(node),
82         start_micros(start_micros),
83         exec_micros(exec_micros),
84         tid(-1) {}
~TimeNode()85   virtual ~TimeNode() {}
86 
name()87   const string& name() { return node->name(); }
88 
89   Process* process;
90   GraphNode* node;
91   int64 start_micros;
92   int64 exec_micros;
93   int64 tid;
94   std::vector<TimeNode*> next_tnodes;
95 };
96 
97 // Tracking the memory based on the op input/output, temporary bytes and
98 // persistent bytes.
99 // Currently, we calculate a "predicted" memory, but do not use it for display.
100 // The displayed memory timeline is directly from the TensorFlow allocator,
101 // which is the groundtruth.
102 class MemoryTracker {
103  public:
104   class Device {
105    public:
106     // map from tensor name to a pair of <alloc time, bytes_in_use>.
107     std::map<string, std::map<int64, int64>> tensor_allocs;
108     // ground truth memory stats. time->bytes.
109     std::map<int64, int64> allocations;
110     // tracked allocations, might miss some bytes.
111     std::map<int64, int64> tracked_allocations;
112   };
113 
114   void TrackNode(int64 step, const GraphNode* node);
115 
devices()116   const std::map<string, Device>& devices() const { return devices_; }
117 
118  private:
119   std::map<string, Device> devices_;
120 };
121 
122 class Timeline {
123  public:
Timeline(int64 step,const string & outfile)124   Timeline(int64 step, const string& outfile)
125       : step_(step), outfile_(outfile) {}
~Timeline()126   ~Timeline() {}
127 
step()128   int64 step() const { return step_; }
SetStep(int64 step)129   void SetStep(int64 step) { step_ = step; }
130 
131   void GenerateGraphTimeline(const std::vector<GraphNode*>& gnodes);
132 
133   void GenerateScopeTimeline(const ScopeNode* node);
134 
135   void GenerateCodeTimeline(const CodeNode* node);
136 
137  private:
TrackNode(const GraphNode * node)138   void TrackNode(const GraphNode* node) { mem_tracker_.TrackNode(step_, node); }
139 
140   void OutputTimeline();
141 
142   template <typename Node>
EmitTreeNode(const Node * node,int64 start_time,int64 duration,int64 depth,std::set<int64> * visited_depth)143   void EmitTreeNode(const Node* node, int64 start_time, int64 duration,
144                     int64 depth, std::set<int64>* visited_depth) {
145     if (visited_depth->find(depth) == visited_depth->end()) {
146       chrome_formatter_.EmitPID(strings::StrCat("Scope:", depth), depth);
147       visited_depth->insert(depth);
148     }
149 
150     Json::Value args(Json::objectValue);
151     args["name"] = Json::Value(node->name());
152     args["op"] = Json::Value(node->name());
153     chrome_formatter_.EmitRegion(start_time, duration, depth, 0, "Op",
154                                  node->name(), args);
155 
156     int64 total_micros = 0;
157     int64 c_start_time = start_time;
158     for (const Node* child : node->show_children) {
159       int64 total_exec_micros = child->proto().total_exec_micros();
160       if (total_exec_micros <= 0) {
161         continue;
162       }
163       EmitTreeNode(child, c_start_time, total_exec_micros, depth + 1,
164                    visited_depth);
165       c_start_time += total_exec_micros;
166       total_micros += total_exec_micros;
167     }
168     CHECK(total_micros <= duration) << node->name() << " parent:" << duration
169                                     << " children:" << total_micros;
170   }
171 
172   void AllocateTimeNodes(GraphNode* gnode);
173 
174   void AllocateLanes();
175 
176   int64 AllocatePID();
177 
178   int64 step_;
179   const string outfile_;
180   int64 next_pid_ = 0;
181   MemoryTracker mem_tracker_;
182   ChromeTraceFormatter chrome_formatter_;
183   std::map<string, int64> device_pids_;
184 
185   std::map<string, std::unique_ptr<Process>> process_;
186   std::map<int64, std::map<int64, std::map<int64, TimeNode*>>> alloc_nodes_;
187   std::map<string, std::map<int64, std::unique_ptr<TimeNode>>> tnodes_;
188 };
189 
190 }  // namespace tfprof
191 }  // namespace tensorflow
192 
193 #endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_TFPROF_TIMELINE_H_
194