1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17 
18 #include <new>
19 #include <string>
20 #include <utility>
21 
22 #include "absl/strings/string_view.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/platform.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/lib/traceme_encode.h"  // IWYU pragma: export
28 
29 #if !defined(IS_MOBILE_PLATFORM)
30 #include "tensorflow/core/profiler/internal/cpu/traceme_recorder.h"
31 #include "tensorflow/core/profiler/utils/time_utils.h"
32 #endif
33 
34 namespace tensorflow {
35 namespace profiler {
36 
37 // Predefined levels:
38 // - Level 1 (kCritical) is the default and used only for user instrumentation.
39 // - Level 2 (kInfo) is used by profiler for instrumenting high level program
40 //   execution details (expensive TF ops, XLA ops, etc).
41 // - Level 3 (kVerbose) is also used by profiler to instrument more verbose
42 //   (low-level) program execution details (cheap TF ops, etc).
43 enum TraceMeLevel {
44   kCritical = 1,
45   kInfo = 2,
46   kVerbose = 3,
47 };
48 
49 // This is specifically used for instrumenting Tensorflow ops.
50 // Takes input as whether a TF op is expensive or not and returns the TraceMe
51 // level to be assigned to trace that particular op. Assigns level 2 for
52 // expensive ops (these are high-level details and shown by default in profiler
53 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)54 inline int GetTFTraceMeLevel(bool is_expensive) {
55   return is_expensive ? kInfo : kVerbose;
56 }
57 
58 // This class permits user-specified (CPU) tracing activities. A trace activity
59 // is started when an object of this class is created and stopped when the
60 // object is destroyed.
61 //
62 // CPU tracing can be useful when trying to understand what parts of GPU
63 // computation (e.g., kernels and memcpy) correspond to higher level activities
64 // in the overall program. For instance, a collection of kernels maybe
65 // performing one "step" of a program that is better visualized together than
66 // interspersed with kernels from other "steps". Therefore, a TraceMe object
67 // can be created at each "step".
68 //
69 // Two APIs are provided:
70 //   (1) Scoped object: a TraceMe object starts tracing on construction, and
71 //       stops tracing when it goes out of scope.
72 //          {
73 //            TraceMe trace("step");
74 //            ... do some work ...
75 //          }
76 //       TraceMe objects can be members of a class, or allocated on the heap.
77 //   (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
78 //          auto id = ActivityStart("step");
79 //          ... do some work ...
80 //          ActivityEnd(id);
81 //       The two static methods should be called within the same thread.
82 class TraceMe {
83  public:
84   // Constructor that traces a user-defined activity labeled with name
85   // in the UI. Level defines the trace priority, used for filtering TraceMe
86   // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
87   // - Must be a positive integer.
88   // - Can be a value in enum TraceMeLevel.
89   // Users are welcome to use level > 3 in their code, if they wish to filter
90   // out their host traces based on verbosity.
91   explicit TraceMe(absl::string_view name, int level = 1) {
92     DCHECK_GE(level, 1);
93 #if !defined(IS_MOBILE_PLATFORM)
94     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
95       new (&no_init_.name) std::string(name);
96       start_time_ = GetCurrentTimeNanos();
97     }
98 #endif
99   }
100 
101   // Do not allow passing a temporary string as the overhead of generating that
102   // string should only be incurred when tracing is enabled. Wrap the temporary
103   // string generation (e.g., StrCat) in a lambda and use the name_generator
104   // template instead.
105   explicit TraceMe(std::string&& name, int level = 1) = delete;
106 
107   // Do not allow passing strings by reference or value since the caller
108   // may unintentionally maintain ownership of the name.
109   // Explicitly wrap the name in a string_view if you really wish to maintain
110   // ownership of a string already generated for other purposes. For temporary
111   // strings (e.g., result of StrCat) use the name_generator template.
112   explicit TraceMe(const std::string& name, int level = 1) = delete;
113 
114   // This overload is necessary to make TraceMe's with string literals work.
115   // Otherwise, the name_generator template would be used.
116   explicit TraceMe(const char* raw, int level = 1)
TraceMe(absl::string_view (raw),level)117       : TraceMe(absl::string_view(raw), level) {}
118 
119   // This overload only generates the name (and possibly metadata) if tracing is
120   // enabled. Useful for avoiding expensive operations (e.g., string
121   // concatenation) when tracing is disabled.
122   // name_generator may be a lambda or functor that returns a type that the
123   // string() constructor can take, e.g., the result of TraceMeEncode.
124   // name_generator is templated, rather than a std::function to avoid
125   // allocations std::function might make even if never called.
126   // Example Usage:
127   //   TraceMe trace_me([&]() {
128   //     return StrCat("my_trace", id);
129   //   }
130   //   TraceMe op_trace_me([&]() {
131   //     return TraceMeOp(op_name, op_type);
132   //   }
133   //   TraceMe trace_me_with_metadata([&value1]() {
134   //     return TraceMeEncode("my_trace", {{"key1", value1}, {"key2", 42}});
135   //   });
136   template <typename NameGeneratorT>
137   explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
138     DCHECK_GE(level, 1);
139 #if !defined(IS_MOBILE_PLATFORM)
140     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
141       new (&no_init_.name) std::string(name_generator());
142       start_time_ = GetCurrentTimeNanos();
143     }
144 #endif
145   }
146 
147   // Movable.
TraceMe(TraceMe && other)148   TraceMe(TraceMe&& other) { *this = std::move(other); }
149   TraceMe& operator=(TraceMe&& other) {
150 #if !defined(IS_MOBILE_PLATFORM)
151     if (TF_PREDICT_FALSE(other.start_time_ != kUntracedActivity)) {
152       new (&no_init_.name) std::string(std::move(other.no_init_.name));
153       other.no_init_.name.~string();
154       start_time_ = std::exchange(other.start_time_, kUntracedActivity);
155     }
156 #endif
157     return *this;
158   }
159 
~TraceMe()160   ~TraceMe() { Stop(); }
161 
162   // Stop tracing the activity. Called by the destructor, but exposed to allow
163   // stopping tracing before the object goes out of scope. Only has an effect
164   // the first time it is called.
Stop()165   void Stop() {
166     // We do not need to check the trace level again here.
167     // - If tracing wasn't active to start with, we have kUntracedActivity.
168     // - If tracing was active and was stopped, we have
169     //   TraceMeRecorder::Active().
170     // - If tracing was active and was restarted at a lower level, we may
171     //   spuriously record the event. This is extremely rare, and acceptable as
172     //   event will be discarded when its start timestamp fall outside of the
173     //   start/stop session timestamp.
174 #if !defined(IS_MOBILE_PLATFORM)
175     if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
176       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
177         TraceMeRecorder::Record(
178             {std::move(no_init_.name), start_time_, GetCurrentTimeNanos()});
179       }
180       no_init_.name.~string();
181       start_time_ = kUntracedActivity;
182     }
183 #endif
184   }
185 
186   // Appends new_metadata to the TraceMe name passed to the constructor.
187   // metadata_generator may be a lambda or functor that returns a type that the
188   // string() constructor can take, e.g., the result of TraceMeEncode.
189   // metadata_generator is only evaluated when tracing is enabled.
190   // metadata_generator is templated, rather than a std::function to avoid
191   // allocations std::function might make even if never called.
192   // Example Usage:
193   //   trace_me.AppendMetadata([&value1]() {
194   //     return TraceMeEncode({{"key1", value1}, {"key2", 42}});
195   //   });
196   template <typename MetadataGeneratorT>
AppendMetadata(MetadataGeneratorT metadata_generator)197   void AppendMetadata(MetadataGeneratorT metadata_generator) {
198 #if !defined(IS_MOBILE_PLATFORM)
199     if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
200       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
201         traceme_internal::AppendMetadata(&no_init_.name, metadata_generator());
202       }
203     }
204 #endif
205   }
206 
207   // Static API, for use when scoped objects are inconvenient.
208 
209   // Record the start time of an activity.
210   // Returns the activity ID, which is used to stop the activity.
211   // Calls `name_generator` to get the name for activity.
212   template <typename NameGeneratorT>
213   static int64 ActivityStart(NameGeneratorT name_generator, int level = 1) {
214 #if !defined(IS_MOBILE_PLATFORM)
215     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
216       int64 activity_id = TraceMeRecorder::NewActivityId();
217       TraceMeRecorder::Record(
218           {name_generator(), GetCurrentTimeNanos(), -activity_id});
219       return activity_id;
220     }
221 #endif
222     return kUntracedActivity;
223   }
224 
225   // Record the start time of an activity.
226   // Returns the activity ID, which is used to stop the activity.
227   static int64 ActivityStart(absl::string_view name, int level = 1) {
228 #if !defined(IS_MOBILE_PLATFORM)
229     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
230       int64 activity_id = TraceMeRecorder::NewActivityId();
231       TraceMeRecorder::Record(
232           {std::string(name), GetCurrentTimeNanos(), -activity_id});
233       return activity_id;
234     }
235 #endif
236     return kUntracedActivity;
237   }
238 
239   // Same as ActivityStart above, an overload for "const std::string&"
240   static int64 ActivityStart(const std::string& name, int level = 1) {
241     return ActivityStart(absl::string_view(name), level);
242   }
243 
244   // Same as ActivityStart above, an overload for "const char*"
245   static int64 ActivityStart(const char* name, int level = 1) {
246     return ActivityStart(absl::string_view(name), level);
247   }
248 
249   // Record the end time of an activity started by ActivityStart().
ActivityEnd(int64 activity_id)250   static void ActivityEnd(int64 activity_id) {
251 #if !defined(IS_MOBILE_PLATFORM)
252     // We don't check the level again (see TraceMe::Stop()).
253     if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
254       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
255         TraceMeRecorder::Record(
256             {std::string(), -activity_id, GetCurrentTimeNanos()});
257       }
258     }
259 #endif
260   }
261 
262   // Records the time of an instant activity.
263   template <typename NameGeneratorT>
264   static void InstantActivity(NameGeneratorT name_generator, int level = 1) {
265 #if !defined(IS_MOBILE_PLATFORM)
266     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
267       int64 now = GetCurrentTimeNanos();
268       TraceMeRecorder::Record(
269           {name_generator(), /*start_time=*/now, /*end_time=*/now});
270     }
271 #endif
272   }
273 
274   static bool Active(int level = 1) {
275 #if !defined(IS_MOBILE_PLATFORM)
276     return TraceMeRecorder::Active(level);
277 #else
278     return false;
279 #endif
280   }
281 
NewActivityId()282   static int64 NewActivityId() {
283 #if !defined(IS_MOBILE_PLATFORM)
284     return TraceMeRecorder::NewActivityId();
285 #else
286     return 0;
287 #endif
288   }
289 
290  private:
291   // Start time used when tracing is disabled.
292   constexpr static int64 kUntracedActivity = 0;
293 
294   TF_DISALLOW_COPY_AND_ASSIGN(TraceMe);
295 
296   // Wrap the name into a union so that we can avoid the cost of string
297   // initialization when tracing is disabled.
298   union NoInit {
NoInit()299     NoInit() {}
~NoInit()300     ~NoInit() {}
301     std::string name;
302   } no_init_;
303 
304   int64 start_time_ = kUntracedActivity;
305 };
306 
307 // Whether OpKernel::TraceString will populate additional information for
308 // profiler, such as tensor shapes.
TfOpDetailsEnabled()309 inline bool TfOpDetailsEnabled() {
310   return TraceMe::Active(TraceMeLevel::kVerbose);
311 }
312 
313 }  // namespace profiler
314 }  // namespace tensorflow
315 
316 #endif  // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
317