1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26 
27 #include <array>
28 #include <string>
29 #include <utility>
30 
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/time.h"
34 #include "perfetto/ext/base/file_utils.h"
35 #include "perfetto/ext/base/metatrace.h"
36 #include "perfetto/ext/tracing/core/trace_writer.h"
37 #include "src/kallsyms/kernel_symbol_map.h"
38 #include "src/kallsyms/lazy_kernel_symbolizer.h"
39 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
40 #include "src/traced/probes/ftrace/cpu_reader.h"
41 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
42 #include "src/traced/probes/ftrace/discover_vendor_tracepoints.h"
43 #include "src/traced/probes/ftrace/event_info.h"
44 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
45 #include "src/traced/probes/ftrace/ftrace_data_source.h"
46 #include "src/traced/probes/ftrace/ftrace_metadata.h"
47 #include "src/traced/probes/ftrace/ftrace_procfs.h"
48 #include "src/traced/probes/ftrace/ftrace_stats.h"
49 #include "src/traced/probes/ftrace/proto_translation_table.h"
50 
51 namespace perfetto {
52 namespace {
53 
54 constexpr int kDefaultDrainPeriodMs = 100;
55 constexpr int kMinDrainPeriodMs = 1;
56 constexpr int kMaxDrainPeriodMs = 1000 * 60;
57 
58 // Read at most this many pages of data per cpu per read task. If we hit this
59 // limit on at least one cpu, we stop and repost the read task, letting other
60 // tasks get some cpu time before continuing reading.
61 constexpr size_t kMaxPagesPerCpuPerReadTick = 256;  // 1 MB per cpu
62 
63 // When reading and parsing data for a particular cpu, we do it in batches of
64 // this many pages. In other words, we'll read up to
65 // |kParsingBufferSizePages| into memory, parse them, and then repeat if we
66 // still haven't caught up to the writer. A working set of 32 pages is 128k of
67 // data, which should fit in a typical L2D cache. Furthermore, the batching
68 // limits the memory usage of traced_probes.
69 //
70 // TODO(rsavitski): consider making buffering & parsing page counts independent,
71 // should be a single counter in the cpu_reader, similar to lost_events case.
72 constexpr size_t kParsingBufferSizePages = 32;
73 
ClampDrainPeriodMs(uint32_t drain_period_ms)74 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
75   if (drain_period_ms == 0) {
76     return kDefaultDrainPeriodMs;
77   }
78   if (drain_period_ms < kMinDrainPeriodMs ||
79       kMaxDrainPeriodMs < drain_period_ms) {
80     PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
81                  drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
82     return kDefaultDrainPeriodMs;
83   }
84   return drain_period_ms;
85 }
86 
WriteToFile(const char * path,const char * str)87 void WriteToFile(const char* path, const char* str) {
88   auto fd = base::OpenFile(path, O_WRONLY);
89   if (!fd)
90     return;
91   base::ignore_result(base::WriteAll(*fd, str, strlen(str)));
92 }
93 
ClearFile(const char * path)94 void ClearFile(const char* path) {
95   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
96 }
97 
98 }  // namespace
99 
100 // Method of last resort to reset ftrace state.
101 // We don't know what state the rest of the system and process is so as far
102 // as possible avoid allocations.
HardResetFtraceState()103 void HardResetFtraceState() {
104   PERFETTO_LOG("Hard resetting ftrace state.");
105 
106   WriteToFile("/sys/kernel/debug/tracing/tracing_on", "0");
107   WriteToFile("/sys/kernel/debug/tracing/buffer_size_kb", "4");
108   WriteToFile("/sys/kernel/debug/tracing/events/enable", "0");
109   ClearFile("/sys/kernel/debug/tracing/trace");
110 
111   WriteToFile("/sys/kernel/tracing/tracing_on", "0");
112   WriteToFile("/sys/kernel/tracing/buffer_size_kb", "4");
113   WriteToFile("/sys/kernel/tracing/events/enable", "0");
114   ClearFile("/sys/kernel/tracing/trace");
115 }
116 
117 // static
Create(base::TaskRunner * runner,Observer * observer)118 std::unique_ptr<FtraceController> FtraceController::Create(
119     base::TaskRunner* runner,
120     Observer* observer) {
121   std::unique_ptr<FtraceProcfs> ftrace_procfs =
122       FtraceProcfs::CreateGuessingMountPoint();
123 
124   if (!ftrace_procfs)
125     return nullptr;
126 
127   auto table = ProtoTranslationTable::Create(
128       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
129 
130   if (!table)
131     return nullptr;
132 
133   AtraceHalWrapper hal;
134   auto vendor_evts =
135       vendor_tracepoints::DiscoverVendorTracepoints(&hal, ftrace_procfs.get());
136 
137   std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
138       new FtraceConfigMuxer(ftrace_procfs.get(), table.get(), vendor_evts));
139   return std::unique_ptr<FtraceController>(
140       new FtraceController(std::move(ftrace_procfs), std::move(table),
141                            std::move(model), runner, observer));
142 }
143 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)144 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
145                                    std::unique_ptr<ProtoTranslationTable> table,
146                                    std::unique_ptr<FtraceConfigMuxer> model,
147                                    base::TaskRunner* task_runner,
148                                    Observer* observer)
149     : task_runner_(task_runner),
150       observer_(observer),
151       symbolizer_(new LazyKernelSymbolizer()),
152       ftrace_procfs_(std::move(ftrace_procfs)),
153       table_(std::move(table)),
154       ftrace_config_muxer_(std::move(model)),
155       weak_factory_(this) {}
156 
~FtraceController()157 FtraceController::~FtraceController() {
158   for (const auto* data_source : data_sources_)
159     ftrace_config_muxer_->RemoveConfig(data_source->config_id());
160   data_sources_.clear();
161   started_data_sources_.clear();
162   StopIfNeeded();
163 }
164 
NowMs() const165 uint64_t FtraceController::NowMs() const {
166   return static_cast<uint64_t>(base::GetWallTimeMs().count());
167 }
168 
StartIfNeeded()169 void FtraceController::StartIfNeeded() {
170   if (started_data_sources_.size() > 1)
171     return;
172   PERFETTO_DCHECK(!started_data_sources_.empty());
173   PERFETTO_DCHECK(per_cpu_.empty());
174 
175   // Lazily allocate the memory used for reading & parsing ftrace.
176   if (!parsing_mem_.IsValid()) {
177     parsing_mem_ =
178         base::PagedMemory::Allocate(base::kPageSize * kParsingBufferSizePages);
179   }
180 
181   per_cpu_.clear();
182   per_cpu_.reserve(ftrace_procfs_->NumberOfCpus());
183   size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
184   for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
185     auto reader = std::unique_ptr<CpuReader>(
186         new CpuReader(cpu, table_.get(), symbolizer_.get(),
187                       ftrace_procfs_->OpenPipeForCpu(cpu)));
188     per_cpu_.emplace_back(std::move(reader), period_page_quota);
189   }
190 
191   // Start the repeating read tasks.
192   auto generation = ++generation_;
193   auto drain_period_ms = GetDrainPeriodMs();
194   auto weak_this = weak_factory_.GetWeakPtr();
195   task_runner_->PostDelayedTask(
196       [weak_this, generation] {
197         if (weak_this)
198           weak_this->ReadTick(generation);
199       },
200       drain_period_ms - (NowMs() % drain_period_ms));
201 }
202 
203 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
204 // we iterate over all per-cpu buffers, parse their contents, and then write out
205 // the serialized packets. This is handled by |CpuReader| instances, which
206 // attempt to read from their respective per-cpu buffer fd until they catch up
207 // to the head of the buffer, or hit a transient error.
208 //
209 // The readers work in batches of |kParsingBufferSizePages| pages for cache
210 // locality, and to limit memory usage.
211 //
212 // However, the reading happens on the primary thread, shared with the rest of
213 // the service (including ipc). If there is a lot of ftrace data to read, we
214 // want to yield to the event loop, re-enqueueing a continuation task at the end
215 // of the immediate queue (letting other enqueued tasks to run before
216 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
217 //
218 // There is also a possibility that the ftrace bandwidth is particularly high.
219 // We do not want to continue trying to catch up to the event stream (via
220 // continuation tasks) without bound, as we want to limit our cpu% usage.  We
221 // assume that given a config saying "per-cpu kernel ftrace buffer is N pages,
222 // and drain every T milliseconds", we should not read more than N pages per
223 // drain period. Therefore we introduce |per_cpu_.period_page_quota|. If the
224 // consumer wants to handle a high bandwidth of ftrace events, they should set
225 // the config values appropriately.
ReadTick(int generation)226 void FtraceController::ReadTick(int generation) {
227   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
228                              metatrace::FTRACE_READ_TICK);
229   if (started_data_sources_.empty() || generation != generation_) {
230     return;
231   }
232 
233 #if PERFETTO_DCHECK_IS_ON()
234   // The OnFtraceDataWrittenIntoDataSourceBuffers() below is supposed to clear
235   // all metadata, including the |kernel_addrs| map for symbolization.
236   for (FtraceDataSource* ds : started_data_sources_) {
237     FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
238     PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
239     PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
240   }
241 #endif
242 
243   // Read all cpu buffers with remaining per-period quota.
244   bool all_cpus_done = true;
245   uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
246   for (size_t i = 0; i < per_cpu_.size(); i++) {
247     size_t orig_quota = per_cpu_[i].period_page_quota;
248     if (orig_quota == 0)
249       continue;
250 
251     size_t max_pages = std::min(orig_quota, kMaxPagesPerCpuPerReadTick);
252     size_t pages_read = per_cpu_[i].reader->ReadCycle(
253         parsing_buf, kParsingBufferSizePages, max_pages, started_data_sources_);
254 
255     size_t new_quota = (pages_read >= orig_quota) ? 0 : orig_quota - pages_read;
256     per_cpu_[i].period_page_quota = new_quota;
257 
258     // Reader got stopped by the cap on the number of pages (to not do too much
259     // work on the shared thread at once), but can read more in this drain
260     // period. Repost the ReadTick (on the immediate queue) to iterate over all
261     // cpus again. In other words, we will keep reposting work for all cpus as
262     // long as at least one of them hits the read page cap each tick. If all
263     // readers catch up to the event stream (pages_read < max_pages), or exceed
264     // their quota, we will stop for the given period.
265     PERFETTO_DCHECK(pages_read <= max_pages);
266     if (pages_read == max_pages && new_quota > 0)
267       all_cpus_done = false;
268   }
269   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
270 
271   // More work to do in this period.
272   auto weak_this = weak_factory_.GetWeakPtr();
273   if (!all_cpus_done) {
274     PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
275     task_runner_->PostTask([weak_this, generation] {
276       if (weak_this)
277         weak_this->ReadTick(generation);
278     });
279   } else {
280     // Done until next drain period.
281     size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
282     for (auto& per_cpu : per_cpu_)
283       per_cpu.period_page_quota = period_page_quota;
284 
285     auto drain_period_ms = GetDrainPeriodMs();
286     task_runner_->PostDelayedTask(
287         [weak_this, generation] {
288           if (weak_this)
289             weak_this->ReadTick(generation);
290         },
291         drain_period_ms - (NowMs() % drain_period_ms));
292   }
293 }
294 
GetDrainPeriodMs()295 uint32_t FtraceController::GetDrainPeriodMs() {
296   if (data_sources_.empty())
297     return kDefaultDrainPeriodMs;
298   uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
299   for (const FtraceDataSource* data_source : data_sources_) {
300     if (data_source->config().drain_period_ms() < min_drain_period_ms)
301       min_drain_period_ms = data_source->config().drain_period_ms();
302   }
303   return ClampDrainPeriodMs(min_drain_period_ms);
304 }
305 
ClearTrace()306 void FtraceController::ClearTrace() {
307   ftrace_procfs_->ClearTrace();
308 }
309 
DisableAllEvents()310 void FtraceController::DisableAllEvents() {
311   ftrace_procfs_->DisableAllEvents();
312 }
313 
WriteTraceMarker(const std::string & s)314 void FtraceController::WriteTraceMarker(const std::string& s) {
315   ftrace_procfs_->WriteTraceMarker(s);
316 }
317 
Flush(FlushRequestID flush_id)318 void FtraceController::Flush(FlushRequestID flush_id) {
319   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
320                              metatrace::FTRACE_CPU_FLUSH);
321 
322   // Read all cpus in one go, limiting the per-cpu read amount to make sure we
323   // don't get stuck chasing the writer if there's a very high bandwidth of
324   // events.
325   size_t per_cpu_buf_size_pages =
326       ftrace_config_muxer_->GetPerCpuBufferSizePages();
327   uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
328   for (size_t i = 0; i < per_cpu_.size(); i++) {
329     per_cpu_[i].reader->ReadCycle(parsing_buf, kParsingBufferSizePages,
330                                   per_cpu_buf_size_pages,
331                                   started_data_sources_);
332   }
333   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
334 
335   for (FtraceDataSource* data_source : started_data_sources_)
336     data_source->OnFtraceFlushComplete(flush_id);
337 }
338 
StopIfNeeded()339 void FtraceController::StopIfNeeded() {
340   if (!started_data_sources_.empty())
341     return;
342 
343   // We are not implicitly flushing on Stop. The tracing service is supposed to
344   // ask for an explicit flush before stopping, unless it needs to perform a
345   // non-graceful stop.
346 
347   per_cpu_.clear();
348   symbolizer_->Destroy();
349 
350   if (parsing_mem_.IsValid()) {
351     parsing_mem_.AdviseDontNeed(parsing_mem_.Get(), parsing_mem_.size());
352   }
353 }
354 
AddDataSource(FtraceDataSource * data_source)355 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
356   if (!ValidConfig(data_source->config()))
357     return false;
358 
359   auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
360   if (!config_id)
361     return false;
362 
363   const FtraceDataSourceConfig* ds_config =
364       ftrace_config_muxer_->GetDataSourceConfig(config_id);
365   auto it_and_inserted = data_sources_.insert(data_source);
366   PERFETTO_DCHECK(it_and_inserted.second);
367   data_source->Initialize(config_id, ds_config);
368   return true;
369 }
370 
StartDataSource(FtraceDataSource * data_source)371 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
372   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
373 
374   FtraceConfigId config_id = data_source->config_id();
375   PERFETTO_CHECK(config_id);
376 
377   if (!ftrace_config_muxer_->ActivateConfig(config_id))
378     return false;
379 
380   started_data_sources_.insert(data_source);
381   StartIfNeeded();
382 
383   // If the config is requesting to symbolize kernel addresses, create the
384   // symbolizer and parse /proc/kallsyms (it will take 200-300 ms). This is not
385   // strictly required here but is to avoid hitting the parsing cost while
386   // processing the first ftrace event batch in CpuReader.
387   if (data_source->config().symbolize_ksyms()) {
388     if (data_source->config().initialize_ksyms_synchronously_for_testing()) {
389       symbolizer_->GetOrCreateKernelSymbolMap();
390     } else {
391       auto weak_this = weak_factory_.GetWeakPtr();
392       task_runner_->PostTask([weak_this] {
393         if (weak_this)
394           weak_this->symbolizer_->GetOrCreateKernelSymbolMap();
395       });
396     }
397   }
398 
399   return true;
400 }
401 
RemoveDataSource(FtraceDataSource * data_source)402 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
403   started_data_sources_.erase(data_source);
404   size_t removed = data_sources_.erase(data_source);
405   if (!removed)
406     return;  // Can happen if AddDataSource failed (e.g. too many sessions).
407   ftrace_config_muxer_->RemoveConfig(data_source->config_id());
408   StopIfNeeded();
409 }
410 
DumpFtraceStats(FtraceStats * stats)411 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
412   DumpAllCpuStats(ftrace_procfs_.get(), stats);
413   if (symbolizer_ && symbolizer_->is_valid()) {
414     auto* symbol_map = symbolizer_->GetOrCreateKernelSymbolMap();
415     stats->kernel_symbols_parsed =
416         static_cast<uint32_t>(symbol_map->num_syms());
417     stats->kernel_symbols_mem_kb =
418         static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
419   }
420 }
421 
422 FtraceController::Observer::~Observer() = default;
423 
424 }  // namespace perfetto
425