1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
17 
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/strings/match.h"
20 #include "absl/strings/str_split.h"
21 #include "absl/strings/string_view.h"
22 #include "absl/types/optional.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
25 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
26 #include "tensorflow/core/profiler/utils/event_span.h"
27 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
28 #include "tensorflow/core/profiler/utils/timespan.h"
29 #include "tensorflow/core/profiler/utils/trace_utils.h"
30 #include "tensorflow/core/profiler/utils/xplane_schema.h"
31 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
32 
33 namespace tensorflow {
34 namespace profiler {
35 namespace {
36 
IsExplicitHostStepMarker(absl::string_view event_name)37 inline bool IsExplicitHostStepMarker(absl::string_view event_name) {
38   return (absl::StartsWith(event_name, "train") ||
39           absl::StartsWith(event_name, "test") ||
40           absl::StartsWith(event_name, "TraceContext")) &&
41          !absl::StrContains(event_name, "/");
42 }
43 
44 // Returns true if the given event_name should be considered as real computation
45 // on CPU.
IsRealCpuCompute(absl::string_view event_name)46 inline bool IsRealCpuCompute(absl::string_view event_name) {
47   bool not_real = absl::StartsWith(event_name, "EagerExecute") ||
48                   absl::StartsWith(event_name, "EagerLocalExecute") ||
49                   absl::StartsWith(event_name, "EagerKernelExecute") ||
50                   absl::StartsWith(event_name, "FunctionRun") ||
51                   IsExplicitHostStepMarker(event_name);
52   return !not_real;
53 }
54 
ParseNumBytesFromMemcpyDetail(absl::string_view memcpy_detail)55 uint64 ParseNumBytesFromMemcpyDetail(absl::string_view memcpy_detail) {
56   const std::vector<absl::string_view> params =
57       absl::StrSplit(memcpy_detail, absl::ByAnyChar(":\n"));
58 
59   // Processes value pairs.
60   for (uint32 ii = 0; ii < params.size(); ii += 2) {
61     if (params[ii] != "num_bytes") continue;
62     uint64 value = 0;
63     if (absl::SimpleAtoi(params[ii + 1], &value)) return value;
64     break;
65   }
66   return 0ULL;
67 }
68 
69 }  // namespace
70 
ConvertHostThreadsXLineToStepEvents(const XLineVisitor & line,bool use_device_step_events,const StepEvents & device_step_events)71 StepEvents ConvertHostThreadsXLineToStepEvents(
72     const XLineVisitor& line, bool use_device_step_events,
73     const StepEvents& device_step_events) {
74   StepEvents result;
75   line.ForEachEvent([&](const XEventVisitor& event) {
76     int64 correlation_id = -1;
77     int64 group_id = -1;
78     absl::string_view step_name;
79     event.ForEachStat([&](const XStatVisitor& stat) {
80       if (!stat.Type().has_value()) return;
81       switch (stat.Type().value()) {
82         case StatType::kCorrelationId:
83           correlation_id = stat.IntValue();
84           break;
85         case StatType::kGroupId:
86           group_id = stat.IntValue();
87           break;
88         case StatType::kStepName:
89           step_name = stat.StrOrRefValue();
90           break;
91       }
92     });
93     if (group_id < 0) return;
94     // Don't add CPU events when (1) it includes device step events and (2) it
95     // doesn't have a device and that the group_id (i.e. step number) already
96     // appears on the device. This will filter out all cpu events that do not
97     // correspond to any steps executed on the device.
98     if (use_device_step_events &&
99         device_step_events.find(group_id) == device_step_events.end())
100       return;
101     if (IsExplicitHostStepMarker(event.Name())) {
102       result[group_id].AddMarker(
103           StepMarker(StepMarkerType::kExplicitHostStepMarker, event.Name(),
104                      event.GetTimespan()));
105     } else if (!step_name.empty()) {
106       // Grouping adds a step_name stat to implicit host step markers.
107       result[group_id].AddMarker(
108           StepMarker(StepMarkerType::kImplicitHostStepMarker, event.Name(),
109                      event.GetTimespan()));
110     } else if (IsRealCpuCompute(event.Name())) {
111       result[group_id].AddEvent(
112           EventTypeSpan(ClassifyCpuEvent(event.Name(), correlation_id,
113                                          use_device_step_events),
114                         event.GetTimespan()));
115     }
116   });
117   return result;
118 }
119 
ConvertHostThreadsXPlaneToStepEvents(const XPlane & host_trace,bool use_device_step_events,const StepEvents & device_step_events)120 StepEvents ConvertHostThreadsXPlaneToStepEvents(
121     const XPlane& host_trace, bool use_device_step_events,
122     const StepEvents& device_step_events) {
123   StepEvents result;
124   XPlaneVisitor plane = CreateTfXPlaneVisitor(&host_trace);
125   plane.ForEachLine([&](const XLineVisitor& line) {
126     CombineStepEvents(ConvertHostThreadsXLineToStepEvents(
127                           line, use_device_step_events, device_step_events),
128                       &result);
129   });
130   return result;
131 }
132 
ConvertDeviceStepInfoToStepMarkers(const XLineVisitor & line)133 StepEvents ConvertDeviceStepInfoToStepMarkers(const XLineVisitor& line) {
134   StepEvents result;
135   line.ForEachEvent([&](const XEventVisitor& event) {
136     if (absl::optional<XStatVisitor> stat = event.GetStat(StatType::kGroupId)) {
137       result[stat->IntValue()].AddMarker(
138           StepMarker(StepMarkerType::kDeviceStepMarker, event.Name(),
139                      event.GetTimespan()));
140     }
141   });
142   return result;
143 }
144 
ConvertDeviceTraceXLineToStepEvents(const uint64 device_id,const XLineVisitor & line)145 StepEvents ConvertDeviceTraceXLineToStepEvents(const uint64 device_id,
146                                                const XLineVisitor& line) {
147   StepEvents result;
148   line.ForEachEvent([&](const XEventVisitor& event) {
149     int64 correlation_id = -1;
150     int64 group_id = -1;
151     absl::string_view tensor_shapes;
152     absl::string_view memcpy_details;
153     event.ForEachStat([&](const XStatVisitor& stat) {
154       if (!stat.Type().has_value()) return;
155       switch (stat.Type().value()) {
156         case StatType::kCorrelationId:
157           correlation_id = stat.IntValue();
158           break;
159         case StatType::kGroupId:
160           group_id = stat.IntValue();
161           break;
162         case StatType::kTensorShapes:
163           tensor_shapes = stat.StrOrRefValue();
164           break;
165         case StatType::kMemcpyDetails:
166           memcpy_details = stat.StrOrRefValue();
167           break;
168       }
169     });
170 
171     if (correlation_id >= 0 && group_id >= 0) {
172       EventType event_type = ClassifyGpuEvent(event.Name(), tensor_shapes);
173       EventTypeSpan event_type_span(event_type, event.GetTimespan());
174       result[group_id].AddEvent(event_type_span);
175       switch (event_type) {
176         case DEVICE_COLLECTIVES: {
177           AllReduceInfo collective_ops;
178           collective_ops.set_name(string(event.Name()));
179           collective_ops.set_start_time_ps(event.TimestampPs());
180           collective_ops.set_end_time_ps(event.EndOffsetPs());
181           // TODO(jiesun): figure out how to get size info etc.
182           result[group_id].AddCollectiveOpEvent(device_id, collective_ops);
183           break;
184         }
185         case HOST_TO_DEVICE:
186         case DEVICE_TO_DEVICE:
187         case DEVICE_TO_HOST: {
188           // TODO(jiesun): not all memcpy events are grouped, figure out a
189           // better way to attribute them to steps.
190           uint64 bytes_transferred =
191               ParseNumBytesFromMemcpyDetail(memcpy_details);
192           result[group_id].AddDeviceMemoryTransferEvent(
193               event_type, event.GetTimespan(), bytes_transferred);
194           break;
195         }
196         default:
197           return;
198       }
199     }
200   });
201   return result;
202 }
203 
ConvertDeviceTraceXPlaneToStepEvents(const XPlane & device_trace)204 StepEvents ConvertDeviceTraceXPlaneToStepEvents(const XPlane& device_trace) {
205   StepEvents result;
206   XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_trace);
207   plane.ForEachLine([&](const XLineVisitor& line) {
208     int64 line_id = line.Id();
209     if (line_id == kThreadIdStepInfo) {
210       CombineStepEvents(ConvertDeviceStepInfoToStepMarkers(line), &result);
211     } else if (IsDerivedThreadId(line_id)) {
212       return;
213     } else {
214       CombineStepEvents(ConvertDeviceTraceXLineToStepEvents(plane.Id(), line),
215                         &result);
216     }
217   });
218   return result;
219 }
220 
221 }  // namespace profiler
222 }  // namespace tensorflow
223