1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/perf/event_config.h"
18
19 #include <linux/perf_event.h>
20 #include <time.h>
21
22 #include <unwindstack/Regs.h>
23 #include <vector>
24
25 #include "perfetto/base/flat_set.h"
26 #include "perfetto/ext/base/optional.h"
27 #include "perfetto/ext/base/utils.h"
28 #include "perfetto/profiling/normalize.h"
29 #include "src/profiling/perf/regs_parsing.h"
30
31 #include "protos/perfetto/common/perf_events.gen.h"
32 #include "protos/perfetto/config/profiling/perf_event_config.gen.h"
33
34 namespace perfetto {
35 namespace profiling {
36
37 namespace {
38 constexpr uint64_t kDefaultSamplingFrequencyHz = 10;
39 constexpr uint32_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB: 256x 4k pages
40 constexpr uint32_t kDefaultReadTickPeriodMs = 100;
41 constexpr uint32_t kDefaultRemoteDescriptorTimeoutMs = 100;
42
Normalize(const std::string & src)43 base::Optional<std::string> Normalize(const std::string& src) {
44 // Construct a null-terminated string that will be mutated by the normalizer.
45 std::vector<char> base(src.size() + 1);
46 memcpy(base.data(), src.data(), src.size());
47 base[src.size()] = '\0';
48
49 char* new_start = base.data();
50 ssize_t new_sz = NormalizeCmdLine(&new_start, base.size());
51 if (new_sz < 0) {
52 PERFETTO_ELOG("Failed to normalize config cmdline [%s], aborting",
53 base.data());
54 return base::nullopt;
55 }
56 return base::make_optional<std::string>(new_start,
57 static_cast<size_t>(new_sz));
58 }
59
60 // Acceptable forms: "sched/sched_switch" or "sched:sched_switch".
SplitTracepointString(const std::string & input)61 std::pair<std::string, std::string> SplitTracepointString(
62 const std::string& input) {
63 auto slash_pos = input.find("/");
64 if (slash_pos != std::string::npos)
65 return std::make_pair(input.substr(0, slash_pos),
66 input.substr(slash_pos + 1));
67
68 auto colon_pos = input.find(":");
69 if (colon_pos != std::string::npos)
70 return std::make_pair(input.substr(0, colon_pos),
71 input.substr(colon_pos + 1));
72
73 return std::make_pair("", input);
74 }
75
76 // If set, the returned id is guaranteed to be non-zero.
ParseTracepointAndResolveId(const protos::gen::PerfEvents::Tracepoint & tracepoint,EventConfig::tracepoint_id_fn_t tracepoint_id_lookup)77 base::Optional<uint32_t> ParseTracepointAndResolveId(
78 const protos::gen::PerfEvents::Tracepoint& tracepoint,
79 EventConfig::tracepoint_id_fn_t tracepoint_id_lookup) {
80 std::string full_name = tracepoint.name();
81 std::string tp_group;
82 std::string tp_name;
83 std::tie(tp_group, tp_name) = SplitTracepointString(full_name);
84 if (tp_group.empty() || tp_name.empty()) {
85 PERFETTO_ELOG(
86 "Invalid tracepoint format: %s. Should be a full path like "
87 "sched:sched_switch or sched/sched_switch.",
88 full_name.c_str());
89 return base::nullopt;
90 }
91
92 uint32_t tracepoint_id = tracepoint_id_lookup(tp_group, tp_name);
93 if (!tracepoint_id) {
94 PERFETTO_ELOG(
95 "Failed to resolve tracepoint %s to its id. Check that tracefs is "
96 "accessible and the event exists.",
97 full_name.c_str());
98 return base::nullopt;
99 }
100 return base::make_optional(tracepoint_id);
101 }
102
103 // Returns |base::nullopt| if any of the input cmdlines couldn't be normalized.
104 // |T| is either gen::PerfEventConfig or gen::PerfEventConfig::Scope.
105 template <typename T>
ParseTargetFilter(const T & cfg)106 base::Optional<TargetFilter> ParseTargetFilter(const T& cfg) {
107 TargetFilter filter;
108 for (const auto& str : cfg.target_cmdline()) {
109 base::Optional<std::string> opt = Normalize(str);
110 if (!opt.has_value()) {
111 PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str());
112 return base::nullopt;
113 }
114 filter.cmdlines.insert(std::move(opt.value()));
115 }
116
117 for (const auto& str : cfg.exclude_cmdline()) {
118 base::Optional<std::string> opt = Normalize(str);
119 if (!opt.has_value()) {
120 PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str());
121 return base::nullopt;
122 }
123 filter.exclude_cmdlines.insert(std::move(opt.value()));
124 }
125
126 for (const int32_t pid : cfg.target_pid()) {
127 filter.pids.insert(pid);
128 }
129
130 for (const int32_t pid : cfg.exclude_pid()) {
131 filter.exclude_pids.insert(pid);
132 }
133
134 filter.additional_cmdline_count = cfg.additional_cmdline_count();
135
136 return base::make_optional(std::move(filter));
137 }
138
IsPowerOfTwo(size_t v)139 constexpr bool IsPowerOfTwo(size_t v) {
140 return (v != 0 && ((v & (v - 1)) == 0));
141 }
142
143 // returns |base::nullopt| if the input is invalid.
ChooseActualRingBufferPages(uint32_t config_value)144 base::Optional<uint32_t> ChooseActualRingBufferPages(uint32_t config_value) {
145 if (!config_value) {
146 static_assert(IsPowerOfTwo(kDefaultDataPagesPerRingBuffer), "");
147 return base::make_optional(kDefaultDataPagesPerRingBuffer);
148 }
149
150 if (!IsPowerOfTwo(config_value)) {
151 PERFETTO_ELOG("kernel buffer size must be a power of two pages");
152 return base::nullopt;
153 }
154
155 return base::make_optional(config_value);
156 }
157
ToPerfCounter(protos::gen::PerfEvents::Counter pb_enum)158 base::Optional<PerfCounter> ToPerfCounter(
159 protos::gen::PerfEvents::Counter pb_enum) {
160 using protos::gen::PerfEvents;
161 switch (static_cast<int>(pb_enum)) { // cast to pacify -Wswitch-enum
162 case PerfEvents::SW_CPU_CLOCK:
163 return PerfCounter::Counter(PerfEvents::SW_CPU_CLOCK, PERF_TYPE_SOFTWARE,
164 PERF_COUNT_SW_CPU_CLOCK);
165 case PerfEvents::SW_PAGE_FAULTS:
166 return PerfCounter::Counter(PerfEvents::SW_PAGE_FAULTS,
167 PERF_TYPE_SOFTWARE,
168 PERF_COUNT_SW_PAGE_FAULTS);
169 case PerfEvents::HW_CPU_CYCLES:
170 return PerfCounter::Counter(PerfEvents::HW_CPU_CYCLES, PERF_TYPE_HARDWARE,
171 PERF_COUNT_HW_CPU_CYCLES);
172 case PerfEvents::HW_INSTRUCTIONS:
173 return PerfCounter::Counter(PerfEvents::HW_INSTRUCTIONS,
174 PERF_TYPE_HARDWARE,
175 PERF_COUNT_HW_INSTRUCTIONS);
176 default:
177 PERFETTO_ELOG("Unrecognised PerfEvents::Counter enum value: %zu",
178 static_cast<size_t>(pb_enum));
179 return base::nullopt;
180 }
181 }
182
183 } // namespace
184
185 // static
Counter(protos::gen::PerfEvents::Counter counter,uint32_t type,uint32_t config)186 PerfCounter PerfCounter::Counter(protos::gen::PerfEvents::Counter counter,
187 uint32_t type,
188 uint32_t config) {
189 PerfCounter ret;
190 ret.counter = counter;
191 ret.type = type;
192 ret.config = config;
193 return ret;
194 }
195
196 // static
Tracepoint(protos::gen::PerfEvents::Tracepoint tracepoint,uint32_t id)197 PerfCounter PerfCounter::Tracepoint(
198 protos::gen::PerfEvents::Tracepoint tracepoint,
199 uint32_t id) {
200 PerfCounter ret;
201 ret.tracepoint = std::move(tracepoint);
202 ret.type = PERF_TYPE_TRACEPOINT;
203 ret.config = id;
204 return ret;
205 }
206
207 // static
Create(const DataSourceConfig & ds_config,tracepoint_id_fn_t tracepoint_id_lookup)208 base::Optional<EventConfig> EventConfig::Create(
209 const DataSourceConfig& ds_config,
210 tracepoint_id_fn_t tracepoint_id_lookup) {
211 protos::gen::PerfEventConfig pb_config;
212 if (!pb_config.ParseFromString(ds_config.perf_event_config_raw()))
213 return base::nullopt;
214
215 return EventConfig::Create(pb_config, ds_config, tracepoint_id_lookup);
216 }
217
218 // static
Create(const protos::gen::PerfEventConfig & pb_config,const DataSourceConfig & raw_ds_config,tracepoint_id_fn_t tracepoint_id_lookup)219 base::Optional<EventConfig> EventConfig::Create(
220 const protos::gen::PerfEventConfig& pb_config,
221 const DataSourceConfig& raw_ds_config,
222 tracepoint_id_fn_t tracepoint_id_lookup) {
223 // Timebase: sampling interval.
224 uint64_t sampling_frequency = 0;
225 uint64_t sampling_period = 0;
226 if (pb_config.timebase().period()) {
227 sampling_period = pb_config.timebase().period();
228 } else if (pb_config.timebase().frequency()) {
229 sampling_frequency = pb_config.timebase().frequency();
230 } else if (pb_config.sampling_frequency()) { // backwards compatibility
231 sampling_frequency = pb_config.sampling_frequency();
232 } else {
233 sampling_frequency = kDefaultSamplingFrequencyHz;
234 }
235 PERFETTO_DCHECK(sampling_period && !sampling_frequency ||
236 !sampling_period && sampling_frequency);
237
238 // Timebase event. Default: CPU timer.
239 PerfCounter timebase_event;
240 if (pb_config.timebase().has_counter()) {
241 auto maybe_counter = ToPerfCounter(pb_config.timebase().counter());
242 if (!maybe_counter)
243 return base::nullopt;
244 timebase_event = *maybe_counter;
245
246 } else if (pb_config.timebase().has_tracepoint()) {
247 const auto& tracepoint_pb = pb_config.timebase().tracepoint();
248 base::Optional<uint32_t> maybe_id =
249 ParseTracepointAndResolveId(tracepoint_pb, tracepoint_id_lookup);
250 if (!maybe_id)
251 return base::nullopt;
252 timebase_event = PerfCounter::Tracepoint(tracepoint_pb, *maybe_id);
253
254 } else {
255 timebase_event =
256 PerfCounter::Counter(protos::gen::PerfEvents::PerfEvents::SW_CPU_CLOCK,
257 PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK);
258 }
259
260 // Callstack sampling.
261 bool sample_callstacks = false;
262 bool kernel_frames = false;
263 TargetFilter target_filter;
264 bool legacy_config = pb_config.all_cpus(); // all_cpus was mandatory before
265 if (pb_config.has_callstack_sampling() || legacy_config) {
266 sample_callstacks = true;
267
268 // Process scoping.
269 auto maybe_filter =
270 pb_config.callstack_sampling().has_scope()
271 ? ParseTargetFilter(pb_config.callstack_sampling().scope())
272 : ParseTargetFilter(pb_config); // backwards compatibility
273 if (!maybe_filter.has_value())
274 return base::nullopt;
275
276 target_filter = std::move(maybe_filter.value());
277
278 // Inclusion of kernel callchains.
279 kernel_frames = pb_config.callstack_sampling().kernel_frames() ||
280 pb_config.kernel_frames();
281 }
282
283 // Ring buffer options.
284 base::Optional<uint32_t> ring_buffer_pages =
285 ChooseActualRingBufferPages(pb_config.ring_buffer_pages());
286 if (!ring_buffer_pages.has_value())
287 return base::nullopt;
288
289 uint32_t read_tick_period_ms = pb_config.ring_buffer_read_period_ms()
290 ? pb_config.ring_buffer_read_period_ms()
291 : kDefaultReadTickPeriodMs;
292
293 // Calculate a rough upper limit for the amount of samples the producer
294 // should read per read tick, as a safeguard against getting stuck chasing the
295 // ring buffer head indefinitely.
296 uint64_t samples_per_tick_limit = 0;
297 if (sampling_frequency) {
298 // expected = rate * period, with a conversion of period from ms to s:
299 uint64_t expected_samples_per_tick =
300 1 + (sampling_frequency * read_tick_period_ms) / 1000;
301 // Double the the limit to account of actual sample rate uncertainties, as
302 // well as any other factors:
303 samples_per_tick_limit = 2 * expected_samples_per_tick;
304 } else { // sampling_period
305 // We don't know the sample rate that a fixed period would cause, but we can
306 // still estimate how many samples will fit in one pass of the ring buffer
307 // (with the assumption that we don't want to read more than one buffer's
308 // capacity within a tick).
309 // TODO(rsavitski): for now, make an extremely conservative guess of an 8
310 // byte sample (stack sampling samples can be up to 64KB). This is most
311 // likely as good as no limit in practice.
312 samples_per_tick_limit = *ring_buffer_pages * (base::kPageSize / 8);
313 }
314 PERFETTO_DLOG("Capping samples (not records) per tick to [%" PRIu64 "]",
315 samples_per_tick_limit);
316 if (samples_per_tick_limit == 0)
317 return base::nullopt;
318
319 // Optional footprint controls.
320 uint64_t max_enqueued_footprint_bytes =
321 pb_config.max_enqueued_footprint_kb() * 1024;
322
323 // Android-specific options.
324 uint32_t remote_descriptor_timeout_ms =
325 pb_config.remote_descriptor_timeout_ms()
326 ? pb_config.remote_descriptor_timeout_ms()
327 : kDefaultRemoteDescriptorTimeoutMs;
328
329 // Build the underlying syscall config struct.
330 perf_event_attr pe = {};
331 pe.size = sizeof(perf_event_attr);
332 pe.disabled = 1; // will be activated via ioctl
333
334 // Sampling timebase.
335 pe.type = timebase_event.type;
336 pe.config = timebase_event.config;
337 if (sampling_frequency) {
338 pe.freq = true;
339 pe.sample_freq = sampling_frequency;
340 } else {
341 pe.sample_period = sampling_period;
342 }
343
344 // What the samples will contain.
345 pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_READ;
346 // PERF_SAMPLE_TIME:
347 // We used to use CLOCK_BOOTTIME, but that is not nmi-safe, and therefore
348 // works only for software events.
349 pe.clockid = CLOCK_MONOTONIC_RAW;
350 pe.use_clockid = true;
351
352 if (sample_callstacks) {
353 pe.sample_type |= PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
354 // PERF_SAMPLE_STACK_USER:
355 // Needs to be < ((u16)(~0u)), and have bottom 8 bits clear.
356 // Note that the kernel still needs to make space for the other parts of the
357 // sample (up to the max record size of 64k), so the effective maximum
358 // can be lower than this.
359 pe.sample_stack_user = (1u << 16) - 256;
360 // PERF_SAMPLE_REGS_USER:
361 pe.sample_regs_user =
362 PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch());
363
364 // Optional kernel callchains:
365 if (kernel_frames) {
366 pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
367 pe.exclude_callchain_user = true;
368 }
369 }
370
371 return EventConfig(
372 raw_ds_config, pe, timebase_event, sample_callstacks,
373 std::move(target_filter), kernel_frames, ring_buffer_pages.value(),
374 read_tick_period_ms, samples_per_tick_limit, remote_descriptor_timeout_ms,
375 pb_config.unwind_state_clear_period_ms(), max_enqueued_footprint_bytes,
376 pb_config.target_installed_by());
377 }
378
EventConfig(const DataSourceConfig & raw_ds_config,const perf_event_attr & pe,const PerfCounter & timebase_event,bool sample_callstacks,TargetFilter target_filter,bool kernel_frames,uint32_t ring_buffer_pages,uint32_t read_tick_period_ms,uint64_t samples_per_tick_limit,uint32_t remote_descriptor_timeout_ms,uint32_t unwind_state_clear_period_ms,uint64_t max_enqueued_footprint_bytes,std::vector<std::string> target_installed_by)379 EventConfig::EventConfig(const DataSourceConfig& raw_ds_config,
380 const perf_event_attr& pe,
381 const PerfCounter& timebase_event,
382 bool sample_callstacks,
383 TargetFilter target_filter,
384 bool kernel_frames,
385 uint32_t ring_buffer_pages,
386 uint32_t read_tick_period_ms,
387 uint64_t samples_per_tick_limit,
388 uint32_t remote_descriptor_timeout_ms,
389 uint32_t unwind_state_clear_period_ms,
390 uint64_t max_enqueued_footprint_bytes,
391 std::vector<std::string> target_installed_by)
392 : perf_event_attr_(pe),
393 timebase_event_(timebase_event),
394 sample_callstacks_(sample_callstacks),
395 target_filter_(std::move(target_filter)),
396 kernel_frames_(kernel_frames),
397 ring_buffer_pages_(ring_buffer_pages),
398 read_tick_period_ms_(read_tick_period_ms),
399 samples_per_tick_limit_(samples_per_tick_limit),
400 remote_descriptor_timeout_ms_(remote_descriptor_timeout_ms),
401 unwind_state_clear_period_ms_(unwind_state_clear_period_ms),
402 max_enqueued_footprint_bytes_(max_enqueued_footprint_bytes),
403 target_installed_by_(std::move(target_installed_by)),
404 raw_ds_config_(raw_ds_config) /* full copy */ {}
405
406 } // namespace profiling
407 } // namespace perfetto
408