1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/strings/string_view.h"
24 #include "tensorflow/core/platform/types.h"
25 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
26 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
27 #include "tensorflow/core/profiler/utils/timespan.h"
28 
29 namespace tensorflow {
30 namespace profiler {
31 
32 // The various event types. Enumerations are numbered such that a bigger number
33 // has a higher priority than a smaller number when used in execution-time
34 // breakdown.
35 enum EventType {
36   // No event associated with the time. It could be that the machine was idle or
37   // executing some events which were not traced.
38   UNKNOWN_TIME = 0,
39   // Host is computing.
40   HOST_COMPUTE = 10,
41   // Host is preprocessing the data before the execution on device.
42   HOST_PREPROCESS = 20,
43   // Host is postprocessing the data after the execution on device.
44   HOST_POSTPROCESS = 30,
45   // Host is batching data (for inference).
46   HOST_BATCH_FORMATION = 40,
47   // Host runtime, like memory allocation and etc.
48   HOST_RUNTIME = 50,
49   // Host is compiling.
50   HOST_COMPILE = 60,
51   // Host-to-host communication.
52   HOST_TO_HOST = 70,
53   // Host-to-device communication.
54   HOST_TO_DEVICE = 80,
55   // Host is preparing to launch a computation on device.
56   HOST_PREPARE = 90,
57   // Assigns a smaller priority to DEVICE_COLLECTIVES than HOST_WAIT_INPUT,
58   // because if an all-reduce event is overlapped with an host-wait-input event,
59   // we want to count it as waiting for input.
60   // Collective Ops such as All-Reduce.
61   DEVICE_COLLECTIVES = 100,
62   // Host is waiting for input.
63   HOST_WAIT_INPUT = 110,
64   // Device-to-device communication.
65   DEVICE_TO_DEVICE = 120,
66   // Device-to-host communication.
67   DEVICE_TO_HOST = 130,
68   // Device is computing with 32-bit precision.
69   DEVICE_COMPUTE_32 = 140,
70   // Device is computing with 16-bit precision.
71   DEVICE_COMPUTE_16 = 150,
72   // Device is waiting for another device.
73   DEVICE_WAIT_DEVICE = 160,
74   // Device is waiting for host.
75   DEVICE_WAIT_HOST = 170,
76   LAST_EVENT_TYPE = DEVICE_WAIT_HOST
77 };
78 
79 // Generic event types that shown to the user.
80 enum GenericEventType {
81   kFirstGenericEventType = 1,
82   // Device is computing.
83   kDeviceCompute = kFirstGenericEventType,
84   // Device-to-device communication.
85   kDeviceToDevice,
86   // Collective Ops such as All-Reduce and NCCL.
87   kDeviceCollectives,
88   // Host is computing.
89   kHostCompute,
90   // Host is preparing to launch a computation on device.
91   kHostPrepare,
92   // Device waiting for input from the host.
93   kInput,
94   // Device sending output to the host.
95   kOutput,
96   // Host is compling.
97   kCompile,
98   // No recognized event associated with the time.
99   kAllOthers,
100   kLastGenericEventType = kAllOthers,
101 };
102 
103 // Contains the type and timespan of an event.
104 struct EventTypeSpan {
105   EventType type;  // type of this event.
106   Timespan span;   // timespan of this event.
EventTypeSpanEventTypeSpan107   EventTypeSpan(EventType t, Timespan s) : type(t), span(s) {}
108   // Equality test.
109   bool operator==(const EventTypeSpan& other) const {
110     return type == other.type && span == other.span;
111   }
112   // Inequality test.
113   bool operator!=(const EventTypeSpan& other) const {
114     return !(*this == other);
115   }
116 };
117 
118 enum class StepMarkerType {
119   // "TraceContext" TraceMe events.
120   kExplicitHostStepMarker,
121   // Identified by group_events (e.g., FunctionRun, SessionRun).
122   kImplicitHostStepMarker,
123   // Derived from the result of group_events. A device step marker starts with
124   // the first device event of the group and ends with the last event of the
125   // group.
126   kDeviceStepMarker,
127 };
128 
129 // Record of an event that is used as a step marker.
130 struct StepMarker {
131   StepMarkerType type;
132   std::string event_name;  // name of this event.
133   Timespan span;           // timespan of this event.
StepMarkerStepMarker134   StepMarker(StepMarkerType step_marker_type, absl::string_view name,
135              Timespan s)
136       : type(step_marker_type), event_name(name), span(s) {}
137   // Equality test.
138   bool operator==(const StepMarker& other) const {
139     return type == other.type && event_name == other.event_name &&
140            span == other.span;
141   }
142   // Inequality test.
143   bool operator!=(const StepMarker& other) const { return !(*this == other); }
144 };
145 
146 // Details of a step. Note that this could be the result of combining the
147 // StepDetails of the same step executed on different cores.
148 class StepDetails {
149  public:
StepDetails()150   StepDetails() : device_memory_transfers_(3) {}
151 
Markers()152   const std::vector<StepMarker>& Markers() const { return markers_; }
Events()153   const std::vector<EventTypeSpan>& Events() const { return events_; }
Collectives()154   const absl::flat_hash_map<uint32, AllReduceDbResult>& Collectives() const {
155     return collectives_;
156   }
DeviceMemoryTransfers()157   const std::vector<DeviceMemoryTransfer>& DeviceMemoryTransfers() const {
158     return device_memory_transfers_;
159   }
160   // Returns the step time.
161   Timespan StepTime() const;
MutableMarkers()162   std::vector<StepMarker>* MutableMarkers() { return &markers_; }
MutableEvents()163   std::vector<EventTypeSpan>* MutableEvents() { return &events_; }
MutableCollectives()164   absl::flat_hash_map<uint32, AllReduceDbResult>* MutableCollectives() {
165     return &collectives_;
166   }
MutableDeviceMemoryTransfers()167   std::vector<DeviceMemoryTransfer>* MutableDeviceMemoryTransfers() {
168     return &device_memory_transfers_;
169   }
170   // Adds a step-marker to this step.
171   void AddMarker(const StepMarker& m);
172   // Adds an EventTypeSpan to this step.
173   void AddEvent(const EventTypeSpan& e);
174   // Adds a collective op to this step.
175   void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e);
176   // Appends device memory transfer events to this step.
177   // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are
178   // allowed.
179   void AddDeviceMemoryTransferEvent(EventType event_type,
180                                     const Timespan& time_span, uint64 bytes);
181   // Appends the step-markers from another step to this step.
182   void AppendMarkers(const std::vector<StepMarker>& other_markers);
183   // Appends the events from another step to this step.
184   void AppendEvents(const std::vector<EventTypeSpan>& other_events);
185   // Appends the collectives from another step to this step.
186   void AppendCollectives(
187       const absl::flat_hash_map<uint32, AllReduceDbResult>& collectives);
188   // Accumulates the device memory transfers from another step to this step.
189   void AggregateDeviceMemoryTransfers(
190       const std::vector<DeviceMemoryTransfer> device_memory_transfers);
191   // Equality test.
192   bool operator==(const StepDetails& other) const;
193   // Inequality test.
194   bool operator!=(const StepDetails& other) const { return !(*this == other); }
195   // Returns a string that prints the content of this object.
196   std::string DebugString() const;
197 
198  private:
199   // All step-markers found for marking this step in the traces. There could be
200   // multiple step-markers for a single step for different reasons. One such
201   // reason is that there may be one step-marker for the same step on each core;
202   // so after combining the StepDetails from multiple cores, there would be
203   // multiple step-markers for the same step.
204   std::vector<StepMarker> markers_;
205   // All events belonging to this step.
206   std::vector<EventTypeSpan> events_;
207   // Collective operation related events such as all-reduce etc.
208   absl::flat_hash_map<uint32, AllReduceDbResult> collectives_;
209   // Device memory transfers (including time and bytes involved).
210   // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event
211   // durations.
212   std::vector<DeviceMemoryTransfer> device_memory_transfers_;
213 };
214 
215 // Map from step_id to the events happened in that step.
216 using StepEvents = absl::flat_hash_map<int64 /*step_id*/, StepDetails>;
217 
218 // Equality test for StepEvents.
219 bool operator==(const StepEvents& a, const StepEvents& b);
220 
221 // Returns the event type of the given CPU event.
222 EventType ClassifyCpuEvent(absl::string_view event_name, int64 correlation_id,
223                            bool has_device);
224 
225 // Returns the event type of the given GPU event and tensor shapes.
226 EventType ClassifyGpuEvent(absl::string_view event_name,
227                            absl::string_view tensor_shapes);
228 
229 // Returns the name of the given EventType.
230 std::string PrintEventType(EventType event_type);
231 
232 // Returns the string of the given GenericEventType.
233 absl::string_view GetGenericEventTypeStr(GenericEventType event_type);
234 
235 // Returns a string that prints the given EventTypeSpan.
236 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);
237 
238 // Returns a string that prints the given StepMarker.
239 std::string PrintStepMarker(const StepMarker& step_marker);
240 
241 // Returns a string that prints the given StepEvents.
242 std::string PrintStepEvents(const StepEvents& step_events);
243 
244 // Combines the src StepEvents into dst.
245 void CombineStepEvents(const StepEvents& src, StepEvents* dst);
246 
247 // Converts from overlapped events to non-overlapped events.
248 std::vector<EventTypeSpan> ToNonOverlappedEvents(
249     const std::vector<EventTypeSpan>& overlapped_events);
250 
251 // Converts from overlapped step-events to non-overlapped step events.
252 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events);
253 
254 // Returns the precision stats of the given non-overlapped step events.
255 PrecisionStats ComputePrecisionStats(
256     const StepEvents& nonoverlapped_step_events);
257 
258 }  // namespace profiler
259 }  // namespace tensorflow
260 
261 #endif  // TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
262