1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
16 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
17 
18 #include <string>
19 
20 #include "absl/strings/string_view.h"
21 #include "absl/types/optional.h"
22 #include "tensorflow/core/platform/env.h"
23 #include "tensorflow/core/platform/logging.h"
24 #include "tensorflow/core/platform/types.h"
25 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
26 
27 namespace tensorflow {
28 namespace profiler {
29 
30 // This is specifically used in xprof_bridge for instrumenting Tensorflow ops.
31 // Takes input as whether a TF op is expensive or not and returns the TraceMe
32 // level to be assigned to trace that particular op. Assigns level 2 for
33 // expensive ops (these are high-level details and shown by default in xprof
34 // UI). Assigns level 3 for cheap ops (low-level details not shown by default).
GetTFTraceMeLevel(bool is_expensive)35 inline int GetTFTraceMeLevel(bool is_expensive) { return is_expensive ? 2 : 3; }
36 
37 // This class permits user-specified (CPU) tracing activities. A trace activity
38 // is started when an object of this class is created and stopped when the
39 // object is destroyed.
40 //
41 // CPU tracing can be useful when trying to understand what parts of GPU
42 // computation (e.g., kernels and memcpy) correspond to higher level activities
43 // in the overall program. For instance, a collection of kernels maybe
44 // performing one "step" of a program that is better visualized together than
45 // interspersed with kernels from other "steps". Therefore, a TraceMe object
46 // can be created at each "step".
47 //
48 // Two APIs are provided:
49 //   (1) Scoped object: a TraceMe object starts tracing on construction, and
50 //       stops tracing when it goes out of scope.
51 //          {
52 //            TraceMe trace("step");
53 //            ... do some work ...
54 //          }
55 //       TraceMe objects can be members of a class, or allocated on the heap.
56 //   (2) Static methods: ActivityStart and ActivityEnd may be called in pairs.
57 //          auto id = ActivityStart("step");
58 //          ... do some work ...
59 //          ActivityEnd(id);
60 class TraceMe {
61  public:
62   // Constructor that traces a user-defined activity labeled with activity_name
63   // in the UI. Level defines the trace priority, used for filtering TraceMe
64   // events. By default, traces with TraceMe level <= 2 are recorded. Levels:
65   // - Must be a positive integer.
66   // - Level 1 is the default and used only for user instrumentation.
67   // - Level 2 is used by xprof for instrumenting high level program execution
68   //   details (expensive TF ops, XLA ops, etc).
69   // - Level 3 is also used by xprof to instrument more verbose (low-level)
70   //   program execution details (cheap TF ops, etc).
71   // Users are welcome to use level >= 2 in their code, if they wish to filter
72   // out their host traces based on verbosity.
73   explicit TraceMe(absl::string_view activity_name, int level = 1) {
74     DCHECK_GE(level, 1);
75     if (TraceMeRecorder::Active(level)) {
76       new (&no_init_.name) string(activity_name);
77       start_time_ = Env::Default()->NowNanos();
78     } else {
79       start_time_ = kUntracedActivity;
80     }
81   }
82 
83   // string&& constructor to prevent an unnecessary string copy, e.g. when a
84   // TraceMe is constructed based on the result of a StrCat operation.
85   // Note: We can't take the string by value because a) it would make the
86   // overloads ambiguous, and b) we want lvalue strings to use the string_view
87   // constructor so we avoid copying them when tracing is disabled.
88   explicit TraceMe(string &&activity_name, int level = 1) {
89     DCHECK_GE(level, 1);
90     if (TraceMeRecorder::Active(level)) {
91       new (&no_init_.name) string(std::move(activity_name));
92       start_time_ = Env::Default()->NowNanos();
93     } else {
94       start_time_ = kUntracedActivity;
95     }
96   }
97 
98   // Do not allow passing strings by reference or value since the caller
99   // may unintentionally maintain ownership of the activity_name.
100   // Explicitly std::move the activity_name or wrap it in a string_view if
101   // you really wish to maintain ownership.
102   explicit TraceMe(const string &activity_name, int level = 1) = delete;
103 
104   // This overload is necessary to make TraceMe's with string literals work.
105   // Otherwise, the string&& and the string_view constructor would be equally
106   // good overload candidates.
107   explicit TraceMe(const char *raw, int level = 1)
TraceMe(absl::string_view (raw),level)108       : TraceMe(absl::string_view(raw), level) {}
109 
110   // This overload only generates the activity name if tracing is enabled.
111   // Useful for avoiding things like string concatenation when tracing is
112   // disabled. The |name_generator| may be a lambda or functor that returns a
113   // type that the string() constructor can take.
114   // name_generator is templated, rather than a std::function to avoid
115   // allocations std::function might make even if never called.
116   // Usage: xprof::TraceMe([&]{ return StrCat(prefix, ":", postfix); });
117   template <typename NameGeneratorT>
118   explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
119     DCHECK_GE(level, 1);
120     if (TraceMeRecorder::Active(level)) {
121       new (&no_init_.name) string(name_generator());
122       start_time_ = Env::Default()->NowNanos();
123     } else {
124       start_time_ = kUntracedActivity;
125     }
126   }
127 
~TraceMe()128   ~TraceMe() {
129     // We do not need to check the trace level again here.
130     // - If tracing wasn't active to start with, we have kUntracedActivity.
131     // - If tracing was active and was stopped, we have
132     //   TraceMeRecorder::Active().
133     // - If tracing was active and was restarted at a lower level, we may
134     //   spuriously record the event. This is extremely rare, and acceptable as
135     //   event will be discarded when its start timestamp fall outside of the
136     //   start/stop session timestamp (recorded in XprofResponse).
137     if (start_time_ != kUntracedActivity) {
138       if (TraceMeRecorder::Active()) {
139         TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name),
140                                  start_time_, Env::Default()->NowNanos()});
141       }
142       no_init_.name.~string();
143     }
144   }
145 
146   // TraceMe is not movable or copyable.
147   TraceMe(const TraceMe &) = delete;
148   TraceMe &operator=(const TraceMe &) = delete;
149 
150   // Static API, for use when scoped objects are inconvenient.
151 
152   // Record the start time of an activity.
153   // Returns the activity ID, which is used to stop the activity.
154   static uint64 ActivityStart(absl::string_view name, int level = 1) {
155     return TraceMeRecorder::Active(level) ? ActivityStartImpl(name)
156                                           : kUntracedActivity;
157   }
158 
159   // Record the end time of an activity started by ActivityStart().
ActivityEnd(uint64 activity_id)160   static void ActivityEnd(uint64 activity_id) {
161     // We don't check the level again (see ~TraceMe()).
162     if (activity_id != kUntracedActivity) {
163       if (TraceMeRecorder::Active()) {
164         ActivityEndImpl(activity_id);
165       }
166     }
167   }
168 
169  private:
170   // Activity ID or start time used when tracing is disabled.
171   constexpr static uint64 kUntracedActivity = 0;
172   // Activity ID used as a placeholder when both start and end are present.
173   constexpr static uint64 kCompleteActivity = 1;
174 
175   static uint64 ActivityStartImpl(absl::string_view activity_name);
176   static void ActivityEndImpl(uint64 activity_id);
177 
178   // Wrap the name into a union so that we can avoid the cost of string
179   // initialization when tracing is disabled.
180   union NoInit {
NoInit()181     NoInit() {}
~NoInit()182     ~NoInit() {}
183     string name;
184   } no_init_;
185 
186   uint64 start_time_;
187 };
188 
189 }  // namespace profiler
190 }  // namespace tensorflow
191 
192 #endif  // TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
193