1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/utils/xplane_schema.h"
17 
18 #include "absl/container/flat_hash_map.h"
19 #include "absl/strings/string_view.h"
20 #include "absl/types/optional.h"
21 #include "tensorflow/core/lib/gtl/map_util.h"
22 #include "tensorflow/core/platform/logging.h"
23 #include "tensorflow/core/platform/types.h"
24 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
25 
26 namespace tensorflow {
27 namespace profiler {
28 
29 const absl::string_view kHostThreadsPlaneName = "/host:CPU";
30 const absl::string_view kGpuPlanePrefix = "/device:GPU:";
31 const absl::string_view kTpuPlanePrefix = "/device:TPU:";
32 const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
33 const absl::string_view kMetadataPlaneName = "/host:metadata";
34 const absl::string_view kTFStreamzPlaneName = "/host:tfstreamz";
35 const absl::string_view kPythonTracerPlaneName = "/host:python-tracer";
36 
37 const absl::string_view kStepLineName = "Steps";
38 const absl::string_view kTensorFlowNameScopeLineName = "TensorFlow Name Scope";
39 const absl::string_view kTensorFlowOpLineName = "TensorFlow Ops";
40 const absl::string_view kXlaModuleLineName = "XLA Modules";
41 const absl::string_view kXlaOpLineName = "XLA Ops";
42 const absl::string_view kKernelLaunchLineName = "Launch Stats";
43 
44 namespace {
45 
46 constexpr int kNumHostEventTypes =
47     HostEventType::kLastHostEventType - HostEventType::kFirstHostEventType + 1;
48 
49 constexpr int kNumStatTypes =
50     StatType::kLastStatType - StatType::kFirstStatType + 1;
51 
52 using HostEventTypeMap = absl::flat_hash_map<absl::string_view, HostEventType>;
53 using HostEventTypeStrMap =
54     absl::flat_hash_map<HostEventType, absl::string_view>;
55 using StatTypeMap = absl::flat_hash_map<absl::string_view, StatType>;
56 using StatTypeStrMap = absl::flat_hash_map<StatType, absl::string_view>;
57 
GetHostEventTypeMap()58 const HostEventTypeMap& GetHostEventTypeMap() {
59   static auto* host_event_type_map = new HostEventTypeMap({
60       {"UnknownHostEventType", kUnknownHostEventType},
61       {"TraceContext", kTraceContext},
62       {"SessionRun", kSessionRun},
63       {"FunctionRun", kFunctionRun},
64       {"RunGraph", kRunGraph},
65       {"RunGraphDone", kRunGraphDone},
66       {"TfOpRun", kTfOpRun},
67       {"EagerKernelExecute", kEagerKernelExecute},
68       {"ExecutorState::Process", kExecutorStateProcess},
69       {"ExecutorDoneCallback", kExecutorDoneCallback},
70       {"MemoryAllocation", kMemoryAllocation},
71       {"MemoryDeallocation", kMemoryDeallocation},
72       // Performance counter related.
73       {"RemotePerfCounter", kRemotePerf},
74       // tf data captured function events.
75       {"InstantiatedCapturedFunction::Run", kTfDataCapturedFunctionRun},
76       {"InstantiatedCapturedFunction::RunWithBorrowedArgs",
77        kTfDataCapturedFunctionRunWithBorrowedArgs},
78       {"InstantiatedCapturedFunction::RunInstantiated",
79        kTfDataCapturedFunctionRunInstantiated},
80       {"InstantiatedCapturedFunction::RunAsync",
81        kTfDataCapturedFunctionRunAsync},
82       // Functional ops.
83       {"CallOp", kCallOp},
84       {"ParallelForOp", kParallelForOp},
85       {"ForeverOp", kForeverOp},
86       {"NumericalGradientOp-EvalRight", kNumericalGradientOpEvalRight},
87       {"NumericalGradientOp-EvalLeft", kNumericalGradientOpEvalLeft},
88       {"SymbolicGradientOp", kSymbolicGradientOp},
89       {"RemoteCallOp", kRemoteCallOp},
90       {"IfOp", kIfOp},
91       {"CaseOp", kCaseOp},
92       {"WhileOp-EvalCond", kWhileOpEvalCond},
93       {"WhileOp-StartBody", kWhileOpStartBody},
94       {"ForOp", kForOp},
95       {"PartitionedCallOp", kPartitionedCallOp},
96       // tf.data related.
97       {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
98       {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
99       {"Iterator", kIterator},
100       {"Iterator::Prefetch::Generator", kDeviceInputPipelineSecondIterator},
101       {"PrefetchProduce", kPrefetchProduce},
102       {"PrefetchConsume", kPrefetchConsume},
103       {"ParallelInterleaveProduce", kParallelInterleaveProduce},
104       {"ParallelInterleaveConsume", kParallelInterleaveConsume},
105       {"ParallelInterleaveInitializeInput",
106        kParallelInterleaveInitializedInput},
107       {"ParallelMapProduce", kParallelMapProduce},
108       {"ParallelMapConsume", kParallelMapConsume},
109       {"MapAndBatchProduce", kMapAndBatchProduce},
110       {"MapAndBatchConsume", kMapAndBatchConsume},
111       {"ParseExampleProduce", kParseExampleProduce},
112       {"ParseExampleConsume", kParseExampleConsume},
113       // Batching related.
114       {"BatchingSessionRun", kBatchingSessionRun},
115       {"ProcessBatch", kProcessBatch},
116       {"ConcatInputTensors", kConcatInputTensors},
117       {"MergeInputTensors", kMergeInputTensors},
118       {"ScheduleWithoutSplit", kScheduleWithoutSplit},
119       {"ScheduleWithSplit", kScheduleWithSplit},
120       {"ASBSQueue::Schedule", kASBSQueueSchedule},
121       // JAX related.
122       {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices},
123       // GPU related.
124       {"KernelLaunch", kKernelLaunch},
125       {"KernelExecute", kKernelExecute},
126   });
127   DCHECK_EQ(host_event_type_map->size(), kNumHostEventTypes);
128   return *host_event_type_map;
129 }
130 
GetStatTypeMap()131 const StatTypeMap& GetStatTypeMap() {
132   static auto* stat_type_map = new StatTypeMap({
133       {"UnknownStatType", kUnknownStatType},
134       // TraceMe arguments.
135       {"id", kStepId},
136       {"parent_step_id", kParentStepId},
137       {"function_step_id", kFunctionStepId},
138       {"device_ordinal", kDeviceOrdinal},
139       {"chip_ordinal", kChipOrdinal},
140       {"node_ordinal", kNodeOrdinal},
141       {"model_id", kModelId},
142       {"queue_addr", kQueueAddr},
143       {"request_id", kRequestId},
144       {"run_id", kRunId},
145       {"graph_type", kGraphType},
146       {"step_num", kStepNum},
147       {"iter_num", kIterNum},
148       {"index_on_host", kIndexOnHost},
149       {"allocator_name", kAllocatorName},
150       {"bytes_reserved", kBytesReserved},
151       {"bytes_allocated", kBytesAllocated},
152       {"bytes_available", kBytesAvailable},
153       {"fragmentation", kFragmentation},
154       {"peak_bytes_in_use", kPeakBytesInUse},
155       {"requested_bytes", kRequestedBytes},
156       {"allocation_bytes", kAllocationBytes},
157       {"addr", kAddress},
158       {"region_type", kRegionType},
159       {"data_type", kDataType},
160       {"shape", kTensorShapes},
161       {"layout", kTensorLayout},
162       {"kpi_name", kKpiName},
163       {"kpi_value", kKpiValue},
164       {"element_id", kElementId},
165       {"parent_id", kParentId},
166       // XPlane semantics related.
167       {"_pt", kProducerType},
168       {"_ct", kConsumerType},
169       {"_p", kProducerId},
170       {"_c", kConsumerId},
171       {"_r", kIsRoot},
172       {"_a", kIsAsync},
173       // Device trace arguments.
174       {"device_id", kDeviceId},
175       {"context_id", kContextId},
176       {"correlation_id", kCorrelationId},
177       {"memcpy_details", kMemcpyDetails},
178       {"memalloc_details", kMemallocDetails},
179       {"MemFree_details", kMemFreeDetails},
180       {"Memset_details", kMemsetDetails},
181       {"MemoryResidency_details", kMemoryResidencyDetails},
182       {"kernel_details", kKernelDetails},
183       {"annotation", kKernelAnnotation},
184       {"nvtx_range", kNVTXRange},
185       {"stream", kStream},
186       // Stats added when processing traces.
187       {"group_id", kGroupId},
188       {"flow", kFlow},
189       {"step_name", kStepName},
190       {"level 0", kLevel0},
191       {"tf_op", kTfOp},
192       {"hlo_op", kHloOp},
193       {"hlo_module", kHloModule},
194       {"equation", kEquation},
195       {"is_eager", kIsEager},
196       {"tf_function_call", kTfFunctionCall},
197       {"tracing_count", kTfFunctionTracingCount},
198       {"flops", kFlops},
199       {"bytes_accessed", kBytesAccessed},
200       {"selected_group_ids", kSelectedGroupIds},
201       // Performance counter related.
202       {"Raw Value", kRawValue},
203       {"Scaled Value", kScaledValue},
204       {"Thread Id", kThreadId},
205       // XLA metadata map related.
206       {"SELF_DURATION_PS", kSelfDurationPs},
207       {"MIN_DURATION_PS", kMinDurationPs},
208       {"Hlo Proto", kHloProto},
209       // Device capability related.
210       {"clock_rate", kDevCapClockRateKHz},
211       {"core_count", kDevCapCoreCount},
212       {"memory_bandwidth", kDevCapMemoryBandwidth},
213       {"memory_size", kDevCapMemorySize},
214       {"compute_cap_major", kDevCapComputeCapMajor},
215       {"compute_cap_minor", kDevCapComputeCapMinor},
216       // Batching related.
217       {"batch_size_after_padding", kBatchSizeAfterPadding},
218       {"padding_amount", kPaddingAmount},
219       {"batching_input_task_size", kBatchingInputTaskSize},
220       // GPU related metrics.
221       {"theoretical_occupancy_pct", kTheoreticalOccupancyPct},
222       {"occupancy_min_grid_size", kOccupancyMinGridSize},
223       {"occupancy_suggested_block_size", kOccupancySuggestedBlockSize},
224   });
225   DCHECK_EQ(stat_type_map->size(), kNumStatTypes);
226   return *stat_type_map;
227 }
228 
GetHostEventTypeStrMap()229 const HostEventTypeStrMap& GetHostEventTypeStrMap() {
230   static auto* host_event_type_str_map = new HostEventTypeStrMap(
231       gtl::ReverseMap<HostEventTypeStrMap>(GetHostEventTypeMap()));
232   return *host_event_type_str_map;
233 }
234 
GetStatTypeStrMap()235 const StatTypeStrMap& GetStatTypeStrMap() {
236   static auto* stat_type_str_map =
237       new StatTypeStrMap(gtl::ReverseMap<StatTypeStrMap>(GetStatTypeMap()));
238   return *stat_type_str_map;
239 }
240 
241 }  // namespace
242 
GetHostEventTypeStr(HostEventType event_type)243 absl::string_view GetHostEventTypeStr(HostEventType event_type) {
244   return GetHostEventTypeStrMap().at(event_type);
245 }
246 
FindHostEventType(absl::string_view event_name)247 absl::optional<int64> FindHostEventType(absl::string_view event_name) {
248   if (auto event_type = gtl::FindOrNull(GetHostEventTypeMap(), event_name)) {
249     return *event_type;
250   }
251   return absl::nullopt;
252 }
253 
FindTfOpEventType(absl::string_view event_name)254 absl::optional<int64> FindTfOpEventType(absl::string_view event_name) {
255   // TF op names.
256   Category category = ParseTfOpFullname(event_name).category;
257   switch (category) {
258     case Category::kTensorFlow:
259       return HostEventType::kTfOpRun;
260     case Category::kTfData:
261       return HostEventType::kIterator;
262     default:
263       return absl::nullopt;
264   }
265 }
266 
GetStatTypeStr(StatType stat_type)267 absl::string_view GetStatTypeStr(StatType stat_type) {
268   return GetStatTypeStrMap().at(stat_type);
269 }
270 
FindStatType(absl::string_view stat_name)271 absl::optional<int64> FindStatType(absl::string_view stat_name) {
272   if (auto stat_type = gtl::FindOrNull(GetStatTypeMap(), stat_name)) {
273     return *stat_type;
274   }
275   return absl::nullopt;
276 }
277 
IsInternalEvent(absl::optional<int64> event_type)278 bool IsInternalEvent(absl::optional<int64> event_type) {
279   // TODO(b/162102421): Introduce a prefix for internal event names.
280   if (!event_type.has_value()) return false;
281   switch (*event_type) {
282     case HostEventType::kMemoryAllocation:
283     case HostEventType::kMemoryDeallocation:
284     case HostEventType::kPrefetchProduce:
285     case HostEventType::kPrefetchConsume:
286     case HostEventType::kParallelInterleaveProduce:
287     case HostEventType::kParallelInterleaveConsume:
288     case HostEventType::kParallelInterleaveInitializedInput:
289     case HostEventType::kParallelMapProduce:
290     case HostEventType::kParallelMapConsume:
291     case HostEventType::kMapAndBatchProduce:
292     case HostEventType::kMapAndBatchConsume:
293     case HostEventType::kParseExampleProduce:
294     case HostEventType::kParseExampleConsume:
295       return true;
296     default:
297       return false;
298   }
299 }
300 
IsInternalStat(absl::optional<int64> stat_type)301 bool IsInternalStat(absl::optional<int64> stat_type) {
302   // TODO(b/162102421): Introduce a prefix for internal stat names.
303   if (!stat_type.has_value()) return false;
304   switch (*stat_type) {
305     case StatType::kKernelDetails:
306     case StatType::kLevel0:
307     case StatType::kProducerType:
308     case StatType::kProducerId:
309     case StatType::kConsumerType:
310     case StatType::kConsumerId:
311     case StatType::kIsRoot:
312     case StatType::kIsAsync:
313     case StatType::kFlops:
314     case StatType::kBytesAccessed:
315       return true;
316     default:
317       return false;
318   }
319 }
320 
321 }  // namespace profiler
322 }  // namespace tensorflow
323