1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/perf/event_config.h"
18 
19 #include <linux/perf_event.h>
20 #include <time.h>
21 
22 #include <unwindstack/Regs.h>
23 #include <vector>
24 
25 #include "perfetto/base/flat_set.h"
26 #include "perfetto/ext/base/optional.h"
27 #include "perfetto/ext/base/utils.h"
28 #include "perfetto/profiling/normalize.h"
29 #include "src/profiling/perf/regs_parsing.h"
30 
31 #include "protos/perfetto/common/perf_events.gen.h"
32 #include "protos/perfetto/config/profiling/perf_event_config.gen.h"
33 
34 namespace perfetto {
35 namespace profiling {
36 
37 namespace {
38 constexpr uint64_t kDefaultSamplingFrequencyHz = 10;
39 constexpr uint32_t kDefaultDataPagesPerRingBuffer = 256;  // 1 MB: 256x 4k pages
40 constexpr uint32_t kDefaultReadTickPeriodMs = 100;
41 constexpr uint32_t kDefaultRemoteDescriptorTimeoutMs = 100;
42 
Normalize(const std::string & src)43 base::Optional<std::string> Normalize(const std::string& src) {
44   // Construct a null-terminated string that will be mutated by the normalizer.
45   std::vector<char> base(src.size() + 1);
46   memcpy(base.data(), src.data(), src.size());
47   base[src.size()] = '\0';
48 
49   char* new_start = base.data();
50   ssize_t new_sz = NormalizeCmdLine(&new_start, base.size());
51   if (new_sz < 0) {
52     PERFETTO_ELOG("Failed to normalize config cmdline [%s], aborting",
53                   base.data());
54     return base::nullopt;
55   }
56   return base::make_optional<std::string>(new_start,
57                                           static_cast<size_t>(new_sz));
58 }
59 
60 // Acceptable forms: "sched/sched_switch" or "sched:sched_switch".
SplitTracepointString(const std::string & input)61 std::pair<std::string, std::string> SplitTracepointString(
62     const std::string& input) {
63   auto slash_pos = input.find("/");
64   if (slash_pos != std::string::npos)
65     return std::make_pair(input.substr(0, slash_pos),
66                           input.substr(slash_pos + 1));
67 
68   auto colon_pos = input.find(":");
69   if (colon_pos != std::string::npos)
70     return std::make_pair(input.substr(0, colon_pos),
71                           input.substr(colon_pos + 1));
72 
73   return std::make_pair("", input);
74 }
75 
76 // If set, the returned id is guaranteed to be non-zero.
ParseTracepointAndResolveId(const protos::gen::PerfEvents::Tracepoint & tracepoint,EventConfig::tracepoint_id_fn_t tracepoint_id_lookup)77 base::Optional<uint32_t> ParseTracepointAndResolveId(
78     const protos::gen::PerfEvents::Tracepoint& tracepoint,
79     EventConfig::tracepoint_id_fn_t tracepoint_id_lookup) {
80   std::string full_name = tracepoint.name();
81   std::string tp_group;
82   std::string tp_name;
83   std::tie(tp_group, tp_name) = SplitTracepointString(full_name);
84   if (tp_group.empty() || tp_name.empty()) {
85     PERFETTO_ELOG(
86         "Invalid tracepoint format: %s. Should be a full path like "
87         "sched:sched_switch or sched/sched_switch.",
88         full_name.c_str());
89     return base::nullopt;
90   }
91 
92   uint32_t tracepoint_id = tracepoint_id_lookup(tp_group, tp_name);
93   if (!tracepoint_id) {
94     PERFETTO_ELOG(
95         "Failed to resolve tracepoint %s to its id. Check that tracefs is "
96         "accessible and the event exists.",
97         full_name.c_str());
98     return base::nullopt;
99   }
100   return base::make_optional(tracepoint_id);
101 }
102 
103 // Returns |base::nullopt| if any of the input cmdlines couldn't be normalized.
104 // |T| is either gen::PerfEventConfig or gen::PerfEventConfig::Scope.
105 template <typename T>
ParseTargetFilter(const T & cfg)106 base::Optional<TargetFilter> ParseTargetFilter(const T& cfg) {
107   TargetFilter filter;
108   for (const auto& str : cfg.target_cmdline()) {
109     base::Optional<std::string> opt = Normalize(str);
110     if (!opt.has_value()) {
111       PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str());
112       return base::nullopt;
113     }
114     filter.cmdlines.insert(std::move(opt.value()));
115   }
116 
117   for (const auto& str : cfg.exclude_cmdline()) {
118     base::Optional<std::string> opt = Normalize(str);
119     if (!opt.has_value()) {
120       PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str());
121       return base::nullopt;
122     }
123     filter.exclude_cmdlines.insert(std::move(opt.value()));
124   }
125 
126   for (const int32_t pid : cfg.target_pid()) {
127     filter.pids.insert(pid);
128   }
129 
130   for (const int32_t pid : cfg.exclude_pid()) {
131     filter.exclude_pids.insert(pid);
132   }
133 
134   filter.additional_cmdline_count = cfg.additional_cmdline_count();
135 
136   return base::make_optional(std::move(filter));
137 }
138 
IsPowerOfTwo(size_t v)139 constexpr bool IsPowerOfTwo(size_t v) {
140   return (v != 0 && ((v & (v - 1)) == 0));
141 }
142 
143 // returns |base::nullopt| if the input is invalid.
ChooseActualRingBufferPages(uint32_t config_value)144 base::Optional<uint32_t> ChooseActualRingBufferPages(uint32_t config_value) {
145   if (!config_value) {
146     static_assert(IsPowerOfTwo(kDefaultDataPagesPerRingBuffer), "");
147     return base::make_optional(kDefaultDataPagesPerRingBuffer);
148   }
149 
150   if (!IsPowerOfTwo(config_value)) {
151     PERFETTO_ELOG("kernel buffer size must be a power of two pages");
152     return base::nullopt;
153   }
154 
155   return base::make_optional(config_value);
156 }
157 
ToPerfCounter(protos::gen::PerfEvents::Counter pb_enum)158 base::Optional<PerfCounter> ToPerfCounter(
159     protos::gen::PerfEvents::Counter pb_enum) {
160   using protos::gen::PerfEvents;
161   switch (static_cast<int>(pb_enum)) {  // cast to pacify -Wswitch-enum
162     case PerfEvents::SW_CPU_CLOCK:
163       return PerfCounter::Counter(PerfEvents::SW_CPU_CLOCK, PERF_TYPE_SOFTWARE,
164                                   PERF_COUNT_SW_CPU_CLOCK);
165     case PerfEvents::SW_PAGE_FAULTS:
166       return PerfCounter::Counter(PerfEvents::SW_PAGE_FAULTS,
167                                   PERF_TYPE_SOFTWARE,
168                                   PERF_COUNT_SW_PAGE_FAULTS);
169     case PerfEvents::HW_CPU_CYCLES:
170       return PerfCounter::Counter(PerfEvents::HW_CPU_CYCLES, PERF_TYPE_HARDWARE,
171                                   PERF_COUNT_HW_CPU_CYCLES);
172     case PerfEvents::HW_INSTRUCTIONS:
173       return PerfCounter::Counter(PerfEvents::HW_INSTRUCTIONS,
174                                   PERF_TYPE_HARDWARE,
175                                   PERF_COUNT_HW_INSTRUCTIONS);
176     default:
177       PERFETTO_ELOG("Unrecognised PerfEvents::Counter enum value: %zu",
178                     static_cast<size_t>(pb_enum));
179       return base::nullopt;
180   }
181 }
182 
183 }  // namespace
184 
185 // static
Counter(protos::gen::PerfEvents::Counter counter,uint32_t type,uint32_t config)186 PerfCounter PerfCounter::Counter(protos::gen::PerfEvents::Counter counter,
187                                  uint32_t type,
188                                  uint32_t config) {
189   PerfCounter ret;
190   ret.counter = counter;
191   ret.type = type;
192   ret.config = config;
193   return ret;
194 }
195 
196 // static
Tracepoint(protos::gen::PerfEvents::Tracepoint tracepoint,uint32_t id)197 PerfCounter PerfCounter::Tracepoint(
198     protos::gen::PerfEvents::Tracepoint tracepoint,
199     uint32_t id) {
200   PerfCounter ret;
201   ret.tracepoint = std::move(tracepoint);
202   ret.type = PERF_TYPE_TRACEPOINT;
203   ret.config = id;
204   return ret;
205 }
206 
207 // static
Create(const DataSourceConfig & ds_config,tracepoint_id_fn_t tracepoint_id_lookup)208 base::Optional<EventConfig> EventConfig::Create(
209     const DataSourceConfig& ds_config,
210     tracepoint_id_fn_t tracepoint_id_lookup) {
211   protos::gen::PerfEventConfig pb_config;
212   if (!pb_config.ParseFromString(ds_config.perf_event_config_raw()))
213     return base::nullopt;
214 
215   return EventConfig::Create(pb_config, ds_config, tracepoint_id_lookup);
216 }
217 
218 // static
Create(const protos::gen::PerfEventConfig & pb_config,const DataSourceConfig & raw_ds_config,tracepoint_id_fn_t tracepoint_id_lookup)219 base::Optional<EventConfig> EventConfig::Create(
220     const protos::gen::PerfEventConfig& pb_config,
221     const DataSourceConfig& raw_ds_config,
222     tracepoint_id_fn_t tracepoint_id_lookup) {
223   // Timebase: sampling interval.
224   uint64_t sampling_frequency = 0;
225   uint64_t sampling_period = 0;
226   if (pb_config.timebase().period()) {
227     sampling_period = pb_config.timebase().period();
228   } else if (pb_config.timebase().frequency()) {
229     sampling_frequency = pb_config.timebase().frequency();
230   } else if (pb_config.sampling_frequency()) {  // backwards compatibility
231     sampling_frequency = pb_config.sampling_frequency();
232   } else {
233     sampling_frequency = kDefaultSamplingFrequencyHz;
234   }
235   PERFETTO_DCHECK(sampling_period && !sampling_frequency ||
236                   !sampling_period && sampling_frequency);
237 
238   // Timebase event. Default: CPU timer.
239   PerfCounter timebase_event;
240   if (pb_config.timebase().has_counter()) {
241     auto maybe_counter = ToPerfCounter(pb_config.timebase().counter());
242     if (!maybe_counter)
243       return base::nullopt;
244     timebase_event = *maybe_counter;
245 
246   } else if (pb_config.timebase().has_tracepoint()) {
247     const auto& tracepoint_pb = pb_config.timebase().tracepoint();
248     base::Optional<uint32_t> maybe_id =
249         ParseTracepointAndResolveId(tracepoint_pb, tracepoint_id_lookup);
250     if (!maybe_id)
251       return base::nullopt;
252     timebase_event = PerfCounter::Tracepoint(tracepoint_pb, *maybe_id);
253 
254   } else {
255     timebase_event =
256         PerfCounter::Counter(protos::gen::PerfEvents::PerfEvents::SW_CPU_CLOCK,
257                              PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK);
258   }
259 
260   // Callstack sampling.
261   bool sample_callstacks = false;
262   bool kernel_frames = false;
263   TargetFilter target_filter;
264   bool legacy_config = pb_config.all_cpus();  // all_cpus was mandatory before
265   if (pb_config.has_callstack_sampling() || legacy_config) {
266     sample_callstacks = true;
267 
268     // Process scoping.
269     auto maybe_filter =
270         pb_config.callstack_sampling().has_scope()
271             ? ParseTargetFilter(pb_config.callstack_sampling().scope())
272             : ParseTargetFilter(pb_config);  // backwards compatibility
273     if (!maybe_filter.has_value())
274       return base::nullopt;
275 
276     target_filter = std::move(maybe_filter.value());
277 
278     // Inclusion of kernel callchains.
279     kernel_frames = pb_config.callstack_sampling().kernel_frames() ||
280                     pb_config.kernel_frames();
281   }
282 
283   // Ring buffer options.
284   base::Optional<uint32_t> ring_buffer_pages =
285       ChooseActualRingBufferPages(pb_config.ring_buffer_pages());
286   if (!ring_buffer_pages.has_value())
287     return base::nullopt;
288 
289   uint32_t read_tick_period_ms = pb_config.ring_buffer_read_period_ms()
290                                      ? pb_config.ring_buffer_read_period_ms()
291                                      : kDefaultReadTickPeriodMs;
292 
293   // Calculate a rough upper limit for the amount of samples the producer
294   // should read per read tick, as a safeguard against getting stuck chasing the
295   // ring buffer head indefinitely.
296   uint64_t samples_per_tick_limit = 0;
297   if (sampling_frequency) {
298     // expected = rate * period, with a conversion of period from ms to s:
299     uint64_t expected_samples_per_tick =
300         1 + (sampling_frequency * read_tick_period_ms) / 1000;
301     // Double the the limit to account of actual sample rate uncertainties, as
302     // well as any other factors:
303     samples_per_tick_limit = 2 * expected_samples_per_tick;
304   } else {  // sampling_period
305     // We don't know the sample rate that a fixed period would cause, but we can
306     // still estimate how many samples will fit in one pass of the ring buffer
307     // (with the assumption that we don't want to read more than one buffer's
308     // capacity within a tick).
309     // TODO(rsavitski): for now, make an extremely conservative guess of an 8
310     // byte sample (stack sampling samples can be up to 64KB). This is most
311     // likely as good as no limit in practice.
312     samples_per_tick_limit = *ring_buffer_pages * (base::kPageSize / 8);
313   }
314   PERFETTO_DLOG("Capping samples (not records) per tick to [%" PRIu64 "]",
315                 samples_per_tick_limit);
316   if (samples_per_tick_limit == 0)
317     return base::nullopt;
318 
319   // Optional footprint controls.
320   uint64_t max_enqueued_footprint_bytes =
321       pb_config.max_enqueued_footprint_kb() * 1024;
322 
323   // Android-specific options.
324   uint32_t remote_descriptor_timeout_ms =
325       pb_config.remote_descriptor_timeout_ms()
326           ? pb_config.remote_descriptor_timeout_ms()
327           : kDefaultRemoteDescriptorTimeoutMs;
328 
329   // Build the underlying syscall config struct.
330   perf_event_attr pe = {};
331   pe.size = sizeof(perf_event_attr);
332   pe.disabled = 1;  // will be activated via ioctl
333 
334   // Sampling timebase.
335   pe.type = timebase_event.type;
336   pe.config = timebase_event.config;
337   if (sampling_frequency) {
338     pe.freq = true;
339     pe.sample_freq = sampling_frequency;
340   } else {
341     pe.sample_period = sampling_period;
342   }
343 
344   // What the samples will contain.
345   pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_READ;
346   // PERF_SAMPLE_TIME:
347   // We used to use CLOCK_BOOTTIME, but that is not nmi-safe, and therefore
348   // works only for software events.
349   pe.clockid = CLOCK_MONOTONIC_RAW;
350   pe.use_clockid = true;
351 
352   if (sample_callstacks) {
353     pe.sample_type |= PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
354     // PERF_SAMPLE_STACK_USER:
355     // Needs to be < ((u16)(~0u)), and have bottom 8 bits clear.
356     // Note that the kernel still needs to make space for the other parts of the
357     // sample (up to the max record size of 64k), so the effective maximum
358     // can be lower than this.
359     pe.sample_stack_user = (1u << 16) - 256;
360     // PERF_SAMPLE_REGS_USER:
361     pe.sample_regs_user =
362         PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch());
363 
364     // Optional kernel callchains:
365     if (kernel_frames) {
366       pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
367       pe.exclude_callchain_user = true;
368     }
369   }
370 
371   return EventConfig(
372       raw_ds_config, pe, timebase_event, sample_callstacks,
373       std::move(target_filter), kernel_frames, ring_buffer_pages.value(),
374       read_tick_period_ms, samples_per_tick_limit, remote_descriptor_timeout_ms,
375       pb_config.unwind_state_clear_period_ms(), max_enqueued_footprint_bytes,
376       pb_config.target_installed_by());
377 }
378 
EventConfig(const DataSourceConfig & raw_ds_config,const perf_event_attr & pe,const PerfCounter & timebase_event,bool sample_callstacks,TargetFilter target_filter,bool kernel_frames,uint32_t ring_buffer_pages,uint32_t read_tick_period_ms,uint64_t samples_per_tick_limit,uint32_t remote_descriptor_timeout_ms,uint32_t unwind_state_clear_period_ms,uint64_t max_enqueued_footprint_bytes,std::vector<std::string> target_installed_by)379 EventConfig::EventConfig(const DataSourceConfig& raw_ds_config,
380                          const perf_event_attr& pe,
381                          const PerfCounter& timebase_event,
382                          bool sample_callstacks,
383                          TargetFilter target_filter,
384                          bool kernel_frames,
385                          uint32_t ring_buffer_pages,
386                          uint32_t read_tick_period_ms,
387                          uint64_t samples_per_tick_limit,
388                          uint32_t remote_descriptor_timeout_ms,
389                          uint32_t unwind_state_clear_period_ms,
390                          uint64_t max_enqueued_footprint_bytes,
391                          std::vector<std::string> target_installed_by)
392     : perf_event_attr_(pe),
393       timebase_event_(timebase_event),
394       sample_callstacks_(sample_callstacks),
395       target_filter_(std::move(target_filter)),
396       kernel_frames_(kernel_frames),
397       ring_buffer_pages_(ring_buffer_pages),
398       read_tick_period_ms_(read_tick_period_ms),
399       samples_per_tick_limit_(samples_per_tick_limit),
400       remote_descriptor_timeout_ms_(remote_descriptor_timeout_ms),
401       unwind_state_clear_period_ms_(unwind_state_clear_period_ms),
402       max_enqueued_footprint_bytes_(max_enqueued_footprint_bytes),
403       target_installed_by_(std::move(target_installed_by)),
404       raw_ds_config_(raw_ds_config) /* full copy */ {}
405 
406 }  // namespace profiling
407 }  // namespace perfetto
408