1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/strings/string_view.h" 24 #include "tensorflow/core/platform/types.h" 25 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" 26 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" 27 #include "tensorflow/core/profiler/utils/timespan.h" 28 29 namespace tensorflow { 30 namespace profiler { 31 32 // The various event types. Enumerations are numbered such that a bigger number 33 // has a higher priority than a smaller number when used in execution-time 34 // breakdown. 35 enum EventType { 36 // No event associated with the time. It could be that the machine was idle or 37 // executing some events which were not traced. 38 UNKNOWN_TIME = 0, 39 // Host is computing. 40 HOST_COMPUTE = 10, 41 // Host is preprocessing the data before the execution on device. 42 HOST_PREPROCESS = 20, 43 // Host is postprocessing the data after the execution on device. 44 HOST_POSTPROCESS = 30, 45 // Host is batching data (for inference). 46 HOST_BATCH_FORMATION = 40, 47 // Host runtime, like memory allocation and etc. 48 HOST_RUNTIME = 50, 49 // Host is compiling. 50 HOST_COMPILE = 60, 51 // Host-to-host communication. 52 HOST_TO_HOST = 70, 53 // Host-to-device communication. 54 HOST_TO_DEVICE = 80, 55 // Host is preparing to launch a computation on device. 56 HOST_PREPARE = 90, 57 // Assigns a smaller priority to DEVICE_COLLECTIVES than HOST_WAIT_INPUT, 58 // because if an all-reduce event is overlapped with an host-wait-input event, 59 // we want to count it as waiting for input. 60 // Collective Ops such as All-Reduce. 61 DEVICE_COLLECTIVES = 100, 62 // Host is waiting for input. 63 HOST_WAIT_INPUT = 110, 64 // Device-to-device communication. 65 DEVICE_TO_DEVICE = 120, 66 // Device-to-host communication. 67 DEVICE_TO_HOST = 130, 68 // Device is computing with 32-bit precision. 69 DEVICE_COMPUTE_32 = 140, 70 // Device is computing with 16-bit precision. 71 DEVICE_COMPUTE_16 = 150, 72 // Device is waiting for another device. 73 DEVICE_WAIT_DEVICE = 160, 74 // Device is waiting for host. 75 DEVICE_WAIT_HOST = 170, 76 LAST_EVENT_TYPE = DEVICE_WAIT_HOST 77 }; 78 79 // Generic event types that shown to the user. 80 enum GenericEventType { 81 kFirstGenericEventType = 1, 82 // Device is computing. 83 kDeviceCompute = kFirstGenericEventType, 84 // Device-to-device communication. 85 kDeviceToDevice, 86 // Collective Ops such as All-Reduce and NCCL. 87 kDeviceCollectives, 88 // Host is computing. 89 kHostCompute, 90 // Host is preparing to launch a computation on device. 91 kHostPrepare, 92 // Device waiting for input from the host. 93 kInput, 94 // Device sending output to the host. 95 kOutput, 96 // Host is compling. 97 kCompile, 98 // No recognized event associated with the time. 99 kAllOthers, 100 kLastGenericEventType = kAllOthers, 101 }; 102 103 // Contains the type and timespan of an event. 104 struct EventTypeSpan { 105 EventType type; // type of this event. 106 Timespan span; // timespan of this event. EventTypeSpanEventTypeSpan107 EventTypeSpan(EventType t, Timespan s) : type(t), span(s) {} 108 // Equality test. 109 bool operator==(const EventTypeSpan& other) const { 110 return type == other.type && span == other.span; 111 } 112 // Inequality test. 113 bool operator!=(const EventTypeSpan& other) const { 114 return !(*this == other); 115 } 116 }; 117 118 enum class StepMarkerType { 119 // "TraceContext" TraceMe events. 120 kExplicitHostStepMarker, 121 // Identified by group_events (e.g., FunctionRun, SessionRun). 122 kImplicitHostStepMarker, 123 // Derived from the result of group_events. A device step marker starts with 124 // the first device event of the group and ends with the last event of the 125 // group. 126 kDeviceStepMarker, 127 }; 128 129 // Record of an event that is used as a step marker. 130 struct StepMarker { 131 StepMarkerType type; 132 std::string event_name; // name of this event. 133 Timespan span; // timespan of this event. StepMarkerStepMarker134 StepMarker(StepMarkerType step_marker_type, absl::string_view name, 135 Timespan s) 136 : type(step_marker_type), event_name(name), span(s) {} 137 // Equality test. 138 bool operator==(const StepMarker& other) const { 139 return type == other.type && event_name == other.event_name && 140 span == other.span; 141 } 142 // Inequality test. 143 bool operator!=(const StepMarker& other) const { return !(*this == other); } 144 }; 145 146 // Details of a step. Note that this could be the result of combining the 147 // StepDetails of the same step executed on different cores. 148 class StepDetails { 149 public: StepDetails()150 StepDetails() : device_memory_transfers_(3) {} 151 Markers()152 const std::vector<StepMarker>& Markers() const { return markers_; } Events()153 const std::vector<EventTypeSpan>& Events() const { return events_; } Collectives()154 const absl::flat_hash_map<uint32, AllReduceDbResult>& Collectives() const { 155 return collectives_; 156 } DeviceMemoryTransfers()157 const std::vector<DeviceMemoryTransfer>& DeviceMemoryTransfers() const { 158 return device_memory_transfers_; 159 } 160 // Returns the step time. 161 Timespan StepTime() const; MutableMarkers()162 std::vector<StepMarker>* MutableMarkers() { return &markers_; } MutableEvents()163 std::vector<EventTypeSpan>* MutableEvents() { return &events_; } MutableCollectives()164 absl::flat_hash_map<uint32, AllReduceDbResult>* MutableCollectives() { 165 return &collectives_; 166 } MutableDeviceMemoryTransfers()167 std::vector<DeviceMemoryTransfer>* MutableDeviceMemoryTransfers() { 168 return &device_memory_transfers_; 169 } 170 // Adds a step-marker to this step. 171 void AddMarker(const StepMarker& m); 172 // Adds an EventTypeSpan to this step. 173 void AddEvent(const EventTypeSpan& e); 174 // Adds a collective op to this step. 175 void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e); 176 // Appends device memory transfer events to this step. 177 // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are 178 // allowed. 179 void AddDeviceMemoryTransferEvent(EventType event_type, 180 const Timespan& time_span, uint64 bytes); 181 // Appends the step-markers from another step to this step. 182 void AppendMarkers(const std::vector<StepMarker>& other_markers); 183 // Appends the events from another step to this step. 184 void AppendEvents(const std::vector<EventTypeSpan>& other_events); 185 // Appends the collectives from another step to this step. 186 void AppendCollectives( 187 const absl::flat_hash_map<uint32, AllReduceDbResult>& collectives); 188 // Accumulates the device memory transfers from another step to this step. 189 void AggregateDeviceMemoryTransfers( 190 const std::vector<DeviceMemoryTransfer> device_memory_transfers); 191 // Equality test. 192 bool operator==(const StepDetails& other) const; 193 // Inequality test. 194 bool operator!=(const StepDetails& other) const { return !(*this == other); } 195 // Returns a string that prints the content of this object. 196 std::string DebugString() const; 197 198 private: 199 // All step-markers found for marking this step in the traces. There could be 200 // multiple step-markers for a single step for different reasons. One such 201 // reason is that there may be one step-marker for the same step on each core; 202 // so after combining the StepDetails from multiple cores, there would be 203 // multiple step-markers for the same step. 204 std::vector<StepMarker> markers_; 205 // All events belonging to this step. 206 std::vector<EventTypeSpan> events_; 207 // Collective operation related events such as all-reduce etc. 208 absl::flat_hash_map<uint32, AllReduceDbResult> collectives_; 209 // Device memory transfers (including time and bytes involved). 210 // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event 211 // durations. 212 std::vector<DeviceMemoryTransfer> device_memory_transfers_; 213 }; 214 215 // Map from step_id to the events happened in that step. 216 using StepEvents = absl::flat_hash_map<int64 /*step_id*/, StepDetails>; 217 218 // Equality test for StepEvents. 219 bool operator==(const StepEvents& a, const StepEvents& b); 220 221 // Returns the event type of the given CPU event. 222 EventType ClassifyCpuEvent(absl::string_view event_name, int64 correlation_id, 223 bool has_device); 224 225 // Returns the event type of the given GPU event and tensor shapes. 226 EventType ClassifyGpuEvent(absl::string_view event_name, 227 absl::string_view tensor_shapes); 228 229 // Returns the name of the given EventType. 230 std::string PrintEventType(EventType event_type); 231 232 // Returns the string of the given GenericEventType. 233 absl::string_view GetGenericEventTypeStr(GenericEventType event_type); 234 235 // Returns a string that prints the given EventTypeSpan. 236 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span); 237 238 // Returns a string that prints the given StepMarker. 239 std::string PrintStepMarker(const StepMarker& step_marker); 240 241 // Returns a string that prints the given StepEvents. 242 std::string PrintStepEvents(const StepEvents& step_events); 243 244 // Combines the src StepEvents into dst. 245 void CombineStepEvents(const StepEvents& src, StepEvents* dst); 246 247 // Converts from overlapped events to non-overlapped events. 248 std::vector<EventTypeSpan> ToNonOverlappedEvents( 249 const std::vector<EventTypeSpan>& overlapped_events); 250 251 // Converts from overlapped step-events to non-overlapped step events. 252 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events); 253 254 // Returns the precision stats of the given non-overlapped step events. 255 PrecisionStats ComputePrecisionStats( 256 const StepEvents& nonoverlapped_step_events); 257 258 } // namespace profiler 259 } // namespace tensorflow 260 261 #endif // TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 262