1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
19 
20 #include <array>
21 #include <atomic>
22 #include <functional>
23 #include <string>
24 
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/thread_utils.h"
27 #include "perfetto/base/time.h"
28 #include "perfetto/ext/base/metatrace_events.h"
29 #include "perfetto/ext/base/utils.h"
30 
31 // A facility to trace execution of the perfetto codebase itself.
32 // The meta-tracing framework is organized into three layers:
33 //
34 // 1. A static ring-buffer in base/ (this file) that supports concurrent writes
35 //    and a single reader.
36 //    The responsibility of this layer is to store events and counters as
37 //    efficiently as possible without re-entering any tracing code.
38 //    This is really a static-storage-based ring-buffer based on a POD array.
39 //    This layer does NOT deal with serializing the meta-trace buffer.
40 //    It posts a task when it's half full and expects something outside of
41 //    base/ to drain the ring-buffer and serialize it, eventually writing it
42 //    into the trace itself, before it gets 100% full.
43 //
44 // 2. A class in tracing/core which takes care of serializing the meta-trace
45 //    buffer into the trace using a TraceWriter. See metatrace_writer.h .
46 //
47 // 3. A data source in traced_probes that, when be enabled via the trace config,
48 //    injects metatrace events into the trace. See metatrace_data_source.h .
49 //
50 // The available events and tags are defined in metatrace_events.h .
51 
52 namespace perfetto {
53 
54 namespace base {
55 class TaskRunner;
56 }  // namespace base
57 
58 namespace metatrace {
59 
60 // Meta-tracing is organized in "tags" that can be selectively enabled. This is
61 // to enable meta-tracing only of one sub-system. This word has one "enabled"
62 // bit for each tag. 0 -> meta-tracing off.
63 extern std::atomic<uint32_t> g_enabled_tags;
64 
65 // Time of the Enable() call. Used as a reference for keeping delta timestmaps
66 // in Record.
67 extern std::atomic<uint64_t> g_enabled_timestamp;
68 
69 // Enables meta-tracing for one or more tags. Once enabled it will discard any
70 // further Enable() calls and return false until disabled,
71 // |read_task| is a closure that will be called enqueued |task_runner| when the
72 // meta-tracing ring buffer is half full. The task is expected to read the ring
73 // buffer using RingBuffer::GetReadIterator() and serialize the contents onto a
74 // file or into the trace itself.
75 // Must be called on the |task_runner| passed.
76 // |task_runner| must have static lifetime.
77 bool Enable(std::function<void()> read_task, base::TaskRunner*, uint32_t tags);
78 
79 // Disables meta-tracing.
80 // Must be called on the same |task_runner| as Enable().
81 void Disable();
82 
TraceTimeNowNs()83 inline uint64_t TraceTimeNowNs() {
84   return static_cast<uint64_t>(base::GetBootTimeNs().count());
85 }
86 
87 // Returns a relaxed view of whether metatracing is enabled for the given tag.
88 // Useful for skipping unnecessary argument computation if metatracing is off.
IsEnabled(uint32_t tag)89 inline bool IsEnabled(uint32_t tag) {
90   auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
91   return PERFETTO_UNLIKELY((enabled_tags & tag) != 0);
92 }
93 
94 // Holds the data for a metatrace event or counter.
95 struct Record {
96   static constexpr uint16_t kTypeMask = 0x8000;
97   static constexpr uint16_t kTypeCounter = 0x8000;
98   static constexpr uint16_t kTypeEvent = 0;
99 
timestamp_nsRecord100   uint64_t timestamp_ns() const {
101     auto base_ns = g_enabled_timestamp.load(std::memory_order_relaxed);
102     PERFETTO_DCHECK(base_ns);
103     return base_ns + ((static_cast<uint64_t>(timestamp_ns_high) << 32) |
104                       timestamp_ns_low);
105   }
106 
set_timestampRecord107   void set_timestamp(uint64_t ts) {
108     auto t_start = g_enabled_timestamp.load(std::memory_order_relaxed);
109     uint64_t diff = ts - t_start;
110     PERFETTO_DCHECK(diff < (1ull << 48));
111     timestamp_ns_low = static_cast<uint32_t>(diff);
112     timestamp_ns_high = static_cast<uint16_t>(diff >> 32);
113   }
114 
115   // We can't just memset() this class because on MSVC std::atomic<> is not
116   // trivially constructible anymore. Also std::atomic<> has a deleted copy
117   // constructor so we cant just do "*this = Record()" either.
118   // See http://bit.ly/339Jlzd .
clearRecord119   void clear() {
120     this->~Record();
121     new (this) Record();
122   }
123 
124   // This field holds the type (counter vs event) in the MSB and event ID (as
125   // defined in metatrace_events.h) in the lowest 15 bits. It is also used also
126   // as a linearization point: this is always written after all the other
127   // fields with a release-store. This is so the reader can determine whether it
128   // can safely process the other event fields after a load-acquire.
129   std::atomic<uint16_t> type_and_id{};
130 
131   // Timestamp is stored as a 48-bits value diffed against g_enabled_timestamp.
132   // This gives us 78 hours from Enabled().
133   uint16_t timestamp_ns_high = 0;
134   uint32_t timestamp_ns_low = 0;
135 
136   uint32_t thread_id = 0;
137 
138   union {
139     // Only one of the two elements can be zero initialized, clang complains
140     // about "initializing multiple members of union" otherwise.
141     uint32_t duration_ns = 0;  // If type == event.
142     int32_t counter_value;     // If type == counter.
143   };
144 };
145 
146 // Hold the meta-tracing data into a statically allocated array.
147 // This class uses static storage (as opposite to being a singleton) to:
148 // - Have the guarantee of always valid storage, so that meta-tracing can be
149 //   safely used in any part of the codebase, including base/ itself.
150 // - Avoid barriers that thread-safe static locals would require.
151 class RingBuffer {
152  public:
153   static constexpr size_t kCapacity = 4096;  // 4096 * 16 bytes = 64K.
154 
155   // This iterator is not idempotent and will bump the read index in the buffer
156   // at the end of the reads. There can be only one reader at any time.
157   // Usage: for (auto it = RingBuffer::GetReadIterator(); it; ++it) { it->... }
158   class ReadIterator {
159    public:
ReadIterator(ReadIterator && other)160     ReadIterator(ReadIterator&& other) {
161       PERFETTO_DCHECK(other.valid_);
162       cur_ = other.cur_;
163       end_ = other.end_;
164       valid_ = other.valid_;
165       other.valid_ = false;
166     }
167 
~ReadIterator()168     ~ReadIterator() {
169       if (!valid_)
170         return;
171       PERFETTO_DCHECK(cur_ >= RingBuffer::rd_index_);
172       PERFETTO_DCHECK(cur_ <= RingBuffer::wr_index_);
173       RingBuffer::rd_index_.store(cur_, std::memory_order_release);
174     }
175 
176     explicit operator bool() const { return cur_ < end_; }
177     const Record* operator->() const { return RingBuffer::At(cur_); }
178     const Record& operator*() const { return *operator->(); }
179 
180     // This is for ++it. it++ is deliberately not supported.
181     ReadIterator& operator++() {
182       PERFETTO_DCHECK(cur_ < end_);
183       // Once a record has been read, mark it as free clearing its type_and_id,
184       // so if we encounter it in another read iteration while being written
185       // we know it's not fully written yet.
186       // The memory_order_relaxed below is enough because:
187       // - The reader is single-threaded and doesn't re-read the same records.
188       // - Before starting a read batch, the reader has an acquire barrier on
189       //   |rd_index_|.
190       // - After terminating a read batch, the ~ReadIterator dtor updates the
191       //   |rd_index_| with a release-store.
192       // - Reader and writer are typically kCapacity/2 apart. So unless an
193       //   overrun happens a writer won't reuse a newly released record any time
194       //   soon. If an overrun happens, everything is busted regardless.
195       At(cur_)->type_and_id.store(0, std::memory_order_relaxed);
196       ++cur_;
197       return *this;
198     }
199 
200    private:
201     friend class RingBuffer;
ReadIterator(uint64_t begin,uint64_t end)202     ReadIterator(uint64_t begin, uint64_t end)
203         : cur_(begin), end_(end), valid_(true) {}
204     ReadIterator& operator=(const ReadIterator&) = delete;
205     ReadIterator(const ReadIterator&) = delete;
206 
207     uint64_t cur_;
208     uint64_t end_;
209     bool valid_;
210   };
211 
At(uint64_t index)212   static Record* At(uint64_t index) {
213     // Doesn't really have to be pow2, but if not the compiler will emit
214     // arithmetic operations to compute the modulo instead of a bitwise AND.
215     static_assert(!(kCapacity & (kCapacity - 1)), "kCapacity must be pow2");
216     PERFETTO_DCHECK(index >= rd_index_);
217     PERFETTO_DCHECK(index <= wr_index_);
218     return &records_[index % kCapacity];
219   }
220 
221   // Must be called on the same task runner passed to Enable()
GetReadIterator()222   static ReadIterator GetReadIterator() {
223     PERFETTO_DCHECK(RingBuffer::IsOnValidTaskRunner());
224     return ReadIterator(rd_index_.load(std::memory_order_acquire),
225                         wr_index_.load(std::memory_order_acquire));
226   }
227 
228   static Record* AppendNewRecord();
229   static void Reset();
230 
has_overruns()231   static bool has_overruns() {
232     return has_overruns_.load(std::memory_order_acquire);
233   }
234 
235   // Can temporarily return a value >= kCapacity but is eventually consistent.
236   // This would happen in case of overruns until threads hit the --wr_index_
237   // in AppendNewRecord().
GetSizeForTesting()238   static uint64_t GetSizeForTesting() {
239     auto wr_index = wr_index_.load(std::memory_order_relaxed);
240     auto rd_index = rd_index_.load(std::memory_order_relaxed);
241     PERFETTO_DCHECK(wr_index >= rd_index);
242     return wr_index - rd_index;
243   }
244 
245  private:
246   friend class ReadIterator;
247 
248   // Returns true if the caller is on the task runner passed to Enable().
249   // Used only for DCHECKs.
250   static bool IsOnValidTaskRunner();
251 
252   static std::array<Record, kCapacity> records_;
253   static std::atomic<bool> read_task_queued_;
254   static std::atomic<uint64_t> wr_index_;
255   static std::atomic<uint64_t> rd_index_;
256   static std::atomic<bool> has_overruns_;
257   static Record bankruptcy_record_;  // Used in case of overruns.
258 };
259 
TraceCounter(uint32_t tag,uint16_t id,int32_t value)260 inline void TraceCounter(uint32_t tag, uint16_t id, int32_t value) {
261   // memory_order_relaxed is okay because the storage has static lifetime.
262   // It is safe to accidentally log an event soon after disabling.
263   auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
264   if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
265     return;
266   Record* record = RingBuffer::AppendNewRecord();
267   record->thread_id = static_cast<uint32_t>(base::GetThreadId());
268   record->set_timestamp(TraceTimeNowNs());
269   record->counter_value = value;
270   record->type_and_id.store(Record::kTypeCounter | id,
271                             std::memory_order_release);
272 }
273 
274 class ScopedEvent {
275  public:
ScopedEvent(uint32_t tag,uint16_t event_id)276   ScopedEvent(uint32_t tag, uint16_t event_id) {
277     auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
278     if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
279       return;
280     event_id_ = event_id;
281     record_ = RingBuffer::AppendNewRecord();
282     record_->thread_id = static_cast<uint32_t>(base::GetThreadId());
283     record_->set_timestamp(TraceTimeNowNs());
284   }
285 
~ScopedEvent()286   ~ScopedEvent() {
287     if (PERFETTO_LIKELY(!record_))
288       return;
289     auto now = TraceTimeNowNs();
290     record_->duration_ns = static_cast<uint32_t>(now - record_->timestamp_ns());
291     record_->type_and_id.store(Record::kTypeEvent | event_id_,
292                                std::memory_order_release);
293   }
294 
295  private:
296   Record* record_ = nullptr;
297   uint16_t event_id_ = 0;
298   ScopedEvent(const ScopedEvent&) = delete;
299   ScopedEvent& operator=(const ScopedEvent&) = delete;
300 };
301 
302 // Boilerplate to derive a unique variable name for the event.
303 #define PERFETTO_METATRACE_UID2(a, b) a##b
304 #define PERFETTO_METATRACE_UID(x) PERFETTO_METATRACE_UID2(metatrace_, x)
305 
306 #define PERFETTO_METATRACE_SCOPED(TAG, ID)                                \
307   ::perfetto::metatrace::ScopedEvent PERFETTO_METATRACE_UID(__COUNTER__)( \
308       ::perfetto::metatrace::TAG, ::perfetto::metatrace::ID)
309 
310 #define PERFETTO_METATRACE_COUNTER(TAG, ID, VALUE)                \
311   ::perfetto::metatrace::TraceCounter(::perfetto::metatrace::TAG, \
312                                       ::perfetto::metatrace::ID,  \
313                                       static_cast<int32_t>(VALUE))
314 
315 }  // namespace metatrace
316 }  // namespace perfetto
317 
318 #endif  // INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
319