1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
19
20 #include <array>
21 #include <atomic>
22 #include <functional>
23 #include <string>
24
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/thread_utils.h"
27 #include "perfetto/base/time.h"
28 #include "perfetto/ext/base/metatrace_events.h"
29 #include "perfetto/ext/base/utils.h"
30
31 // A facility to trace execution of the perfetto codebase itself.
32 // The meta-tracing framework is organized into three layers:
33 //
34 // 1. A static ring-buffer in base/ (this file) that supports concurrent writes
35 // and a single reader.
36 // The responsibility of this layer is to store events and counters as
37 // efficiently as possible without re-entering any tracing code.
38 // This is really a static-storage-based ring-buffer based on a POD array.
39 // This layer does NOT deal with serializing the meta-trace buffer.
40 // It posts a task when it's half full and expects something outside of
41 // base/ to drain the ring-buffer and serialize it, eventually writing it
42 // into the trace itself, before it gets 100% full.
43 //
44 // 2. A class in tracing/core which takes care of serializing the meta-trace
45 // buffer into the trace using a TraceWriter. See metatrace_writer.h .
46 //
47 // 3. A data source in traced_probes that, when be enabled via the trace config,
48 // injects metatrace events into the trace. See metatrace_data_source.h .
49 //
50 // The available events and tags are defined in metatrace_events.h .
51
52 namespace perfetto {
53
54 namespace base {
55 class TaskRunner;
56 } // namespace base
57
58 namespace metatrace {
59
60 // Meta-tracing is organized in "tags" that can be selectively enabled. This is
61 // to enable meta-tracing only of one sub-system. This word has one "enabled"
62 // bit for each tag. 0 -> meta-tracing off.
63 extern std::atomic<uint32_t> g_enabled_tags;
64
65 // Time of the Enable() call. Used as a reference for keeping delta timestmaps
66 // in Record.
67 extern std::atomic<uint64_t> g_enabled_timestamp;
68
69 // Enables meta-tracing for one or more tags. Once enabled it will discard any
70 // further Enable() calls and return false until disabled,
71 // |read_task| is a closure that will be called enqueued |task_runner| when the
72 // meta-tracing ring buffer is half full. The task is expected to read the ring
73 // buffer using RingBuffer::GetReadIterator() and serialize the contents onto a
74 // file or into the trace itself.
75 // Must be called on the |task_runner| passed.
76 // |task_runner| must have static lifetime.
77 bool Enable(std::function<void()> read_task, base::TaskRunner*, uint32_t tags);
78
79 // Disables meta-tracing.
80 // Must be called on the same |task_runner| as Enable().
81 void Disable();
82
TraceTimeNowNs()83 inline uint64_t TraceTimeNowNs() {
84 return static_cast<uint64_t>(base::GetBootTimeNs().count());
85 }
86
87 // Returns a relaxed view of whether metatracing is enabled for the given tag.
88 // Useful for skipping unnecessary argument computation if metatracing is off.
IsEnabled(uint32_t tag)89 inline bool IsEnabled(uint32_t tag) {
90 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
91 return PERFETTO_UNLIKELY((enabled_tags & tag) != 0);
92 }
93
94 // Holds the data for a metatrace event or counter.
95 struct Record {
96 static constexpr uint16_t kTypeMask = 0x8000;
97 static constexpr uint16_t kTypeCounter = 0x8000;
98 static constexpr uint16_t kTypeEvent = 0;
99
timestamp_nsRecord100 uint64_t timestamp_ns() const {
101 auto base_ns = g_enabled_timestamp.load(std::memory_order_relaxed);
102 PERFETTO_DCHECK(base_ns);
103 return base_ns + ((static_cast<uint64_t>(timestamp_ns_high) << 32) |
104 timestamp_ns_low);
105 }
106
set_timestampRecord107 void set_timestamp(uint64_t ts) {
108 auto t_start = g_enabled_timestamp.load(std::memory_order_relaxed);
109 uint64_t diff = ts - t_start;
110 PERFETTO_DCHECK(diff < (1ull << 48));
111 timestamp_ns_low = static_cast<uint32_t>(diff);
112 timestamp_ns_high = static_cast<uint16_t>(diff >> 32);
113 }
114
115 // We can't just memset() this class because on MSVC std::atomic<> is not
116 // trivially constructible anymore. Also std::atomic<> has a deleted copy
117 // constructor so we cant just do "*this = Record()" either.
118 // See http://bit.ly/339Jlzd .
clearRecord119 void clear() {
120 this->~Record();
121 new (this) Record();
122 }
123
124 // This field holds the type (counter vs event) in the MSB and event ID (as
125 // defined in metatrace_events.h) in the lowest 15 bits. It is also used also
126 // as a linearization point: this is always written after all the other
127 // fields with a release-store. This is so the reader can determine whether it
128 // can safely process the other event fields after a load-acquire.
129 std::atomic<uint16_t> type_and_id{};
130
131 // Timestamp is stored as a 48-bits value diffed against g_enabled_timestamp.
132 // This gives us 78 hours from Enabled().
133 uint16_t timestamp_ns_high = 0;
134 uint32_t timestamp_ns_low = 0;
135
136 uint32_t thread_id = 0;
137
138 union {
139 // Only one of the two elements can be zero initialized, clang complains
140 // about "initializing multiple members of union" otherwise.
141 uint32_t duration_ns = 0; // If type == event.
142 int32_t counter_value; // If type == counter.
143 };
144 };
145
146 // Hold the meta-tracing data into a statically allocated array.
147 // This class uses static storage (as opposite to being a singleton) to:
148 // - Have the guarantee of always valid storage, so that meta-tracing can be
149 // safely used in any part of the codebase, including base/ itself.
150 // - Avoid barriers that thread-safe static locals would require.
151 class RingBuffer {
152 public:
153 static constexpr size_t kCapacity = 4096; // 4096 * 16 bytes = 64K.
154
155 // This iterator is not idempotent and will bump the read index in the buffer
156 // at the end of the reads. There can be only one reader at any time.
157 // Usage: for (auto it = RingBuffer::GetReadIterator(); it; ++it) { it->... }
158 class ReadIterator {
159 public:
ReadIterator(ReadIterator && other)160 ReadIterator(ReadIterator&& other) {
161 PERFETTO_DCHECK(other.valid_);
162 cur_ = other.cur_;
163 end_ = other.end_;
164 valid_ = other.valid_;
165 other.valid_ = false;
166 }
167
~ReadIterator()168 ~ReadIterator() {
169 if (!valid_)
170 return;
171 PERFETTO_DCHECK(cur_ >= RingBuffer::rd_index_);
172 PERFETTO_DCHECK(cur_ <= RingBuffer::wr_index_);
173 RingBuffer::rd_index_.store(cur_, std::memory_order_release);
174 }
175
176 explicit operator bool() const { return cur_ < end_; }
177 const Record* operator->() const { return RingBuffer::At(cur_); }
178 const Record& operator*() const { return *operator->(); }
179
180 // This is for ++it. it++ is deliberately not supported.
181 ReadIterator& operator++() {
182 PERFETTO_DCHECK(cur_ < end_);
183 // Once a record has been read, mark it as free clearing its type_and_id,
184 // so if we encounter it in another read iteration while being written
185 // we know it's not fully written yet.
186 // The memory_order_relaxed below is enough because:
187 // - The reader is single-threaded and doesn't re-read the same records.
188 // - Before starting a read batch, the reader has an acquire barrier on
189 // |rd_index_|.
190 // - After terminating a read batch, the ~ReadIterator dtor updates the
191 // |rd_index_| with a release-store.
192 // - Reader and writer are typically kCapacity/2 apart. So unless an
193 // overrun happens a writer won't reuse a newly released record any time
194 // soon. If an overrun happens, everything is busted regardless.
195 At(cur_)->type_and_id.store(0, std::memory_order_relaxed);
196 ++cur_;
197 return *this;
198 }
199
200 private:
201 friend class RingBuffer;
ReadIterator(uint64_t begin,uint64_t end)202 ReadIterator(uint64_t begin, uint64_t end)
203 : cur_(begin), end_(end), valid_(true) {}
204 ReadIterator& operator=(const ReadIterator&) = delete;
205 ReadIterator(const ReadIterator&) = delete;
206
207 uint64_t cur_;
208 uint64_t end_;
209 bool valid_;
210 };
211
At(uint64_t index)212 static Record* At(uint64_t index) {
213 // Doesn't really have to be pow2, but if not the compiler will emit
214 // arithmetic operations to compute the modulo instead of a bitwise AND.
215 static_assert(!(kCapacity & (kCapacity - 1)), "kCapacity must be pow2");
216 PERFETTO_DCHECK(index >= rd_index_);
217 PERFETTO_DCHECK(index <= wr_index_);
218 return &records_[index % kCapacity];
219 }
220
221 // Must be called on the same task runner passed to Enable()
GetReadIterator()222 static ReadIterator GetReadIterator() {
223 PERFETTO_DCHECK(RingBuffer::IsOnValidTaskRunner());
224 return ReadIterator(rd_index_.load(std::memory_order_acquire),
225 wr_index_.load(std::memory_order_acquire));
226 }
227
228 static Record* AppendNewRecord();
229 static void Reset();
230
has_overruns()231 static bool has_overruns() {
232 return has_overruns_.load(std::memory_order_acquire);
233 }
234
235 // Can temporarily return a value >= kCapacity but is eventually consistent.
236 // This would happen in case of overruns until threads hit the --wr_index_
237 // in AppendNewRecord().
GetSizeForTesting()238 static uint64_t GetSizeForTesting() {
239 auto wr_index = wr_index_.load(std::memory_order_relaxed);
240 auto rd_index = rd_index_.load(std::memory_order_relaxed);
241 PERFETTO_DCHECK(wr_index >= rd_index);
242 return wr_index - rd_index;
243 }
244
245 private:
246 friend class ReadIterator;
247
248 // Returns true if the caller is on the task runner passed to Enable().
249 // Used only for DCHECKs.
250 static bool IsOnValidTaskRunner();
251
252 static std::array<Record, kCapacity> records_;
253 static std::atomic<bool> read_task_queued_;
254 static std::atomic<uint64_t> wr_index_;
255 static std::atomic<uint64_t> rd_index_;
256 static std::atomic<bool> has_overruns_;
257 static Record bankruptcy_record_; // Used in case of overruns.
258 };
259
TraceCounter(uint32_t tag,uint16_t id,int32_t value)260 inline void TraceCounter(uint32_t tag, uint16_t id, int32_t value) {
261 // memory_order_relaxed is okay because the storage has static lifetime.
262 // It is safe to accidentally log an event soon after disabling.
263 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
264 if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
265 return;
266 Record* record = RingBuffer::AppendNewRecord();
267 record->thread_id = static_cast<uint32_t>(base::GetThreadId());
268 record->set_timestamp(TraceTimeNowNs());
269 record->counter_value = value;
270 record->type_and_id.store(Record::kTypeCounter | id,
271 std::memory_order_release);
272 }
273
274 class ScopedEvent {
275 public:
ScopedEvent(uint32_t tag,uint16_t event_id)276 ScopedEvent(uint32_t tag, uint16_t event_id) {
277 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
278 if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
279 return;
280 event_id_ = event_id;
281 record_ = RingBuffer::AppendNewRecord();
282 record_->thread_id = static_cast<uint32_t>(base::GetThreadId());
283 record_->set_timestamp(TraceTimeNowNs());
284 }
285
~ScopedEvent()286 ~ScopedEvent() {
287 if (PERFETTO_LIKELY(!record_))
288 return;
289 auto now = TraceTimeNowNs();
290 record_->duration_ns = static_cast<uint32_t>(now - record_->timestamp_ns());
291 record_->type_and_id.store(Record::kTypeEvent | event_id_,
292 std::memory_order_release);
293 }
294
295 private:
296 Record* record_ = nullptr;
297 uint16_t event_id_ = 0;
298 ScopedEvent(const ScopedEvent&) = delete;
299 ScopedEvent& operator=(const ScopedEvent&) = delete;
300 };
301
302 // Boilerplate to derive a unique variable name for the event.
303 #define PERFETTO_METATRACE_UID2(a, b) a##b
304 #define PERFETTO_METATRACE_UID(x) PERFETTO_METATRACE_UID2(metatrace_, x)
305
306 #define PERFETTO_METATRACE_SCOPED(TAG, ID) \
307 ::perfetto::metatrace::ScopedEvent PERFETTO_METATRACE_UID(__COUNTER__)( \
308 ::perfetto::metatrace::TAG, ::perfetto::metatrace::ID)
309
310 #define PERFETTO_METATRACE_COUNTER(TAG, ID, VALUE) \
311 ::perfetto::metatrace::TraceCounter(::perfetto::metatrace::TAG, \
312 ::perfetto::metatrace::ID, \
313 static_cast<int32_t>(VALUE))
314
315 } // namespace metatrace
316 } // namespace perfetto
317
318 #endif // INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
319