1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26 
27 #include <array>
28 #include <string>
29 #include <utility>
30 
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/file_utils.h"
33 #include "perfetto/base/logging.h"
34 #include "perfetto/base/metatrace.h"
35 #include "perfetto/base/time.h"
36 #include "perfetto/tracing/core/trace_writer.h"
37 #include "src/traced/probes/ftrace/cpu_reader.h"
38 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
39 #include "src/traced/probes/ftrace/event_info.h"
40 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
41 #include "src/traced/probes/ftrace/ftrace_data_source.h"
42 #include "src/traced/probes/ftrace/ftrace_metadata.h"
43 #include "src/traced/probes/ftrace/ftrace_procfs.h"
44 #include "src/traced/probes/ftrace/ftrace_stats.h"
45 #include "src/traced/probes/ftrace/proto_translation_table.h"
46 
47 namespace perfetto {
48 namespace {
49 
50 constexpr int kDefaultDrainPeriodMs = 100;
51 constexpr int kControllerFlushTimeoutMs = 500;
52 constexpr int kMinDrainPeriodMs = 1;
53 constexpr int kMaxDrainPeriodMs = 1000 * 60;
54 
ClampDrainPeriodMs(uint32_t drain_period_ms)55 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
56   if (drain_period_ms == 0) {
57     return kDefaultDrainPeriodMs;
58   }
59   if (drain_period_ms < kMinDrainPeriodMs ||
60       kMaxDrainPeriodMs < drain_period_ms) {
61     PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
62                  drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
63     return kDefaultDrainPeriodMs;
64   }
65   return drain_period_ms;
66 }
67 
WriteToFile(const char * path,const char * str)68 void WriteToFile(const char* path, const char* str) {
69   auto fd = base::OpenFile(path, O_WRONLY);
70   if (!fd)
71     return;
72   base::ignore_result(base::WriteAll(*fd, str, strlen(str)));
73 }
74 
ClearFile(const char * path)75 void ClearFile(const char* path) {
76   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
77 }
78 
79 }  // namespace
80 
81 const char* const FtraceController::kTracingPaths[] = {
82 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
83     "/sys/kernel/tracing/", "/sys/kernel/debug/tracing/", nullptr,
84 #else
85     "/sys/kernel/debug/tracing/", nullptr,
86 #endif
87 };
88 
89 // Method of last resort to reset ftrace state.
90 // We don't know what state the rest of the system and process is so as far
91 // as possible avoid allocations.
HardResetFtraceState()92 void HardResetFtraceState() {
93   WriteToFile("/sys/kernel/debug/tracing/tracing_on", "0");
94   WriteToFile("/sys/kernel/debug/tracing/buffer_size_kb", "4");
95   WriteToFile("/sys/kernel/debug/tracing/events/enable", "0");
96   ClearFile("/sys/kernel/debug/tracing/trace");
97 
98   WriteToFile("/sys/kernel/tracing/tracing_on", "0");
99   WriteToFile("/sys/kernel/tracing/buffer_size_kb", "4");
100   WriteToFile("/sys/kernel/tracing/events/enable", "0");
101   ClearFile("/sys/kernel/tracing/trace");
102 }
103 
104 // static
105 // TODO(taylori): Add a test for tracing paths in integration tests.
Create(base::TaskRunner * runner,Observer * observer)106 std::unique_ptr<FtraceController> FtraceController::Create(
107     base::TaskRunner* runner,
108     Observer* observer) {
109   size_t index = 0;
110   std::unique_ptr<FtraceProcfs> ftrace_procfs = nullptr;
111   while (!ftrace_procfs && kTracingPaths[index]) {
112     ftrace_procfs = FtraceProcfs::Create(kTracingPaths[index++]);
113   }
114 
115   if (!ftrace_procfs)
116     return nullptr;
117 
118   auto table = ProtoTranslationTable::Create(
119       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
120 
121   if (!table)
122     return nullptr;
123 
124   std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
125       new FtraceConfigMuxer(ftrace_procfs.get(), table.get()));
126   return std::unique_ptr<FtraceController>(
127       new FtraceController(std::move(ftrace_procfs), std::move(table),
128                            std::move(model), runner, observer));
129 }
130 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)131 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
132                                    std::unique_ptr<ProtoTranslationTable> table,
133                                    std::unique_ptr<FtraceConfigMuxer> model,
134                                    base::TaskRunner* task_runner,
135                                    Observer* observer)
136     : task_runner_(task_runner),
137       observer_(observer),
138       thread_sync_(task_runner),
139       ftrace_procfs_(std::move(ftrace_procfs)),
140       table_(std::move(table)),
141       ftrace_config_muxer_(std::move(model)),
142       weak_factory_(this) {
143   thread_sync_.trace_controller_weak = GetWeakPtr();
144 }
145 
~FtraceController()146 FtraceController::~FtraceController() {
147   PERFETTO_DCHECK_THREAD(thread_checker_);
148   for (const auto* data_source : data_sources_)
149     ftrace_config_muxer_->RemoveConfig(data_source->config_id());
150   data_sources_.clear();
151   started_data_sources_.clear();
152   StopIfNeeded();
153 }
154 
NowMs() const155 uint64_t FtraceController::NowMs() const {
156   return static_cast<uint64_t>(base::GetWallTimeMs().count());
157 }
158 
159 // The OnCpuReader* methods below are called on the CpuReader worker threads.
160 // Lifetime is guaranteed to be valid, because the FtraceController dtor
161 // (that happens on the main thread) joins the worker threads.
162 
163 // static
OnCpuReaderRead(size_t cpu,int generation,FtraceThreadSync * thread_sync)164 void FtraceController::OnCpuReaderRead(size_t cpu,
165                                        int generation,
166                                        FtraceThreadSync* thread_sync) {
167   PERFETTO_METATRACE("OnCpuReaderRead()", cpu);
168 
169   {
170     std::lock_guard<std::mutex> lock(thread_sync->mutex);
171     // If this was the first CPU to wake up, schedule a drain for the next
172     // drain interval.
173     bool post_drain_task = thread_sync->cpus_to_drain.none();
174     thread_sync->cpus_to_drain[cpu] = true;
175     if (!post_drain_task)
176       return;
177   }  // lock(thread_sync_.mutex)
178 
179   base::WeakPtr<FtraceController> weak_ctl = thread_sync->trace_controller_weak;
180   base::TaskRunner* task_runner = thread_sync->task_runner;
181 
182   // The nested PostTask is used because the FtraceController (and hence
183   // GetDrainPeriodMs()) can be called only on the main thread.
184   task_runner->PostTask([weak_ctl, task_runner, generation] {
185 
186     if (!weak_ctl)
187       return;
188     uint32_t drain_period_ms = weak_ctl->GetDrainPeriodMs();
189 
190     task_runner->PostDelayedTask(
191         [weak_ctl, generation] {
192           if (weak_ctl)
193             weak_ctl->DrainCPUs(generation);
194         },
195         drain_period_ms - (weak_ctl->NowMs() % drain_period_ms));
196 
197   });
198 }
199 
200 // static
OnCpuReaderFlush(size_t cpu,int generation,FtraceThreadSync * thread_sync)201 void FtraceController::OnCpuReaderFlush(size_t cpu,
202                                         int generation,
203                                         FtraceThreadSync* thread_sync) {
204   // In the case of a flush, we want to drain the data as quickly as possible to
205   // minimize the flush latency, at the cost of more tasks / wakeups (eventually
206   // one task per cpu). Flushes are not supposed to happen too frequently.
207   {
208     std::lock_guard<std::mutex> lock(thread_sync->mutex);
209     thread_sync->cpus_to_drain[cpu] = true;
210     thread_sync->flush_acks[cpu] = true;
211   }  // lock(thread_sync_.mutex)
212 
213   base::WeakPtr<FtraceController> weak_ctl = thread_sync->trace_controller_weak;
214   thread_sync->task_runner->PostTask([weak_ctl, generation] {
215     if (weak_ctl)
216       weak_ctl->DrainCPUs(generation);
217   });
218 }
219 
DrainCPUs(int generation)220 void FtraceController::DrainCPUs(int generation) {
221   PERFETTO_DCHECK_THREAD(thread_checker_);
222   PERFETTO_METATRACE("DrainCPUs()", base::MetaTrace::kMainThreadCpu);
223 
224   if (generation != generation_)
225     return;
226 
227   const size_t num_cpus = ftrace_procfs_->NumberOfCpus();
228   PERFETTO_DCHECK(cpu_readers_.size() == num_cpus);
229   FlushRequestID ack_flush_request_id = 0;
230   std::bitset<base::kMaxCpus> cpus_to_drain;
231   {
232     std::lock_guard<std::mutex> lock(thread_sync_.mutex);
233     std::swap(cpus_to_drain, thread_sync_.cpus_to_drain);
234 
235     // Check also if a flush is pending and if all cpus have acked. If that's
236     // the case, ack the overall Flush() request at the end of this function.
237     if (cur_flush_request_id_ && thread_sync_.flush_acks.count() >= num_cpus) {
238       thread_sync_.flush_acks.reset();
239       ack_flush_request_id = cur_flush_request_id_;
240       cur_flush_request_id_ = 0;
241     }
242   }
243 
244   for (size_t cpu = 0; cpu < num_cpus; cpu++) {
245     if (!cpus_to_drain[cpu])
246       continue;
247     // This method reads the pipe and converts the raw ftrace data into
248     // protobufs using the |data_source|'s TraceWriter.
249     cpu_readers_[cpu]->Drain(started_data_sources_);
250     OnDrainCpuForTesting(cpu);
251   }
252 
253   // If we filled up any SHM pages while draining the data, we will have posted
254   // a task to notify traced about this. Only unblock the readers after this
255   // notification is sent to make it less likely that they steal CPU time away
256   // from traced. Also, don't unblock the readers until all of them have replied
257   // to the flush.
258   if (!cur_flush_request_id_) {
259     base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
260     task_runner_->PostTask([weak_this] {
261       if (weak_this)
262         weak_this->UnblockReaders();
263     });
264   }
265 
266   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
267 
268   if (ack_flush_request_id) {
269     // Flush completed, all CpuReader(s) acked.
270 
271     IssueThreadSyncCmd(FtraceThreadSync::kRun);  // Switch back to reading mode.
272 
273     // This will call FtraceDataSource::OnFtraceFlushComplete(), which in turn
274     // will flush the userspace buffers and ack the flush to the ProbesProducer
275     // which in turn will ack the flush to the tracing service.
276     NotifyFlushCompleteToStartedDataSources(ack_flush_request_id);
277   }
278 }
279 
UnblockReaders()280 void FtraceController::UnblockReaders() {
281   PERFETTO_METATRACE("UnblockReaders()", base::MetaTrace::kMainThreadCpu);
282 
283   // If a flush or a quit is pending, do nothing.
284   std::unique_lock<std::mutex> lock(thread_sync_.mutex);
285   if (thread_sync_.cmd != FtraceThreadSync::kRun)
286     return;
287 
288   // Unblock all waiting readers to start moving more data into their
289   // respective staging pipes.
290   IssueThreadSyncCmd(FtraceThreadSync::kRun, std::move(lock));
291 }
292 
StartIfNeeded()293 void FtraceController::StartIfNeeded() {
294   if (started_data_sources_.size() > 1)
295     return;
296   PERFETTO_DCHECK(!started_data_sources_.empty());
297   PERFETTO_DCHECK(cpu_readers_.empty());
298   base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
299 
300   {
301     std::lock_guard<std::mutex> lock(thread_sync_.mutex);
302     thread_sync_.cpus_to_drain.reset();
303     thread_sync_.cmd = FtraceThreadSync::kRun;
304     thread_sync_.cmd_id++;
305   }
306 
307   generation_++;
308   cpu_readers_.clear();
309   cpu_readers_.reserve(ftrace_procfs_->NumberOfCpus());
310   for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
311     cpu_readers_.emplace_back(
312         new CpuReader(table_.get(), &thread_sync_, cpu, generation_,
313                       ftrace_procfs_->OpenPipeForCpu(cpu)));
314   }
315 }
316 
GetDrainPeriodMs()317 uint32_t FtraceController::GetDrainPeriodMs() {
318   if (data_sources_.empty())
319     return kDefaultDrainPeriodMs;
320   uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
321   for (const FtraceDataSource* data_source : data_sources_) {
322     if (data_source->config().drain_period_ms() < min_drain_period_ms)
323       min_drain_period_ms = data_source->config().drain_period_ms();
324   }
325   return ClampDrainPeriodMs(min_drain_period_ms);
326 }
327 
ClearTrace()328 void FtraceController::ClearTrace() {
329   ftrace_procfs_->ClearTrace();
330 }
331 
DisableAllEvents()332 void FtraceController::DisableAllEvents() {
333   ftrace_procfs_->DisableAllEvents();
334 }
335 
WriteTraceMarker(const std::string & s)336 void FtraceController::WriteTraceMarker(const std::string& s) {
337   ftrace_procfs_->WriteTraceMarker(s);
338 }
339 
Flush(FlushRequestID flush_id)340 void FtraceController::Flush(FlushRequestID flush_id) {
341   PERFETTO_DCHECK_THREAD(thread_checker_);
342 
343   if (flush_id == cur_flush_request_id_)
344     return;  // Already dealing with this flush request.
345 
346   cur_flush_request_id_ = flush_id;
347   {
348     std::unique_lock<std::mutex> lock(thread_sync_.mutex);
349     thread_sync_.flush_acks.reset();
350     IssueThreadSyncCmd(FtraceThreadSync::kFlush, std::move(lock));
351   }
352 
353   base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
354   task_runner_->PostDelayedTask(
355       [weak_this, flush_id] {
356         if (weak_this)
357           weak_this->OnFlushTimeout(flush_id);
358       },
359       kControllerFlushTimeoutMs);
360 }
361 
OnFlushTimeout(FlushRequestID flush_request_id)362 void FtraceController::OnFlushTimeout(FlushRequestID flush_request_id) {
363   if (flush_request_id != cur_flush_request_id_)
364     return;
365 
366   std::string acks;  // For debugging purposes only.
367   {
368     // Unlock the cpu readers and move on.
369     std::unique_lock<std::mutex> lock(thread_sync_.mutex);
370     acks = thread_sync_.flush_acks.to_string();
371     thread_sync_.flush_acks.reset();
372     if (thread_sync_.cmd == FtraceThreadSync::kFlush)
373       IssueThreadSyncCmd(FtraceThreadSync::kRun, std::move(lock));
374   }
375 
376   PERFETTO_ELOG("Ftrace flush(%" PRIu64 ") timed out. Acked cpus mask: [%s]",
377                 flush_request_id, acks.c_str());
378   cur_flush_request_id_ = 0;
379   NotifyFlushCompleteToStartedDataSources(flush_request_id);
380 }
381 
StopIfNeeded()382 void FtraceController::StopIfNeeded() {
383   if (!started_data_sources_.empty())
384     return;
385 
386   // We are not implicitly flushing on Stop. The tracing service is supposed to
387   // ask for an explicit flush before stopping, unless it needs to perform a
388   // non-graceful stop.
389 
390   IssueThreadSyncCmd(FtraceThreadSync::kQuit);
391 
392   // Destroying the CpuReader(s) will join on their worker threads.
393   cpu_readers_.clear();
394   generation_++;
395 }
396 
AddDataSource(FtraceDataSource * data_source)397 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
398   PERFETTO_DCHECK_THREAD(thread_checker_);
399   if (!ValidConfig(data_source->config()))
400     return false;
401 
402   auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
403   if (!config_id)
404     return false;
405 
406   const EventFilter* filter = ftrace_config_muxer_->GetEventFilter(config_id);
407   auto it_and_inserted = data_sources_.insert(data_source);
408   PERFETTO_DCHECK(it_and_inserted.second);
409   data_source->Initialize(config_id, filter);
410   return true;
411 }
412 
StartDataSource(FtraceDataSource * data_source)413 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
414   PERFETTO_DCHECK_THREAD(thread_checker_);
415   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
416 
417   FtraceConfigId config_id = data_source->config_id();
418   PERFETTO_CHECK(config_id);
419 
420   if (!ftrace_config_muxer_->ActivateConfig(config_id))
421     return false;
422 
423   started_data_sources_.insert(data_source);
424   StartIfNeeded();
425   return true;
426 }
427 
RemoveDataSource(FtraceDataSource * data_source)428 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
429   PERFETTO_DCHECK_THREAD(thread_checker_);
430   started_data_sources_.erase(data_source);
431   size_t removed = data_sources_.erase(data_source);
432   if (!removed)
433     return;  // Can happen if AddDataSource failed (e.g. too many sessions).
434   ftrace_config_muxer_->RemoveConfig(data_source->config_id());
435   StopIfNeeded();
436 }
437 
DumpFtraceStats(FtraceStats * stats)438 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
439   DumpAllCpuStats(ftrace_procfs_.get(), stats);
440 }
441 
IssueThreadSyncCmd(FtraceThreadSync::Cmd cmd,std::unique_lock<std::mutex> pass_lock_from_caller)442 void FtraceController::IssueThreadSyncCmd(
443     FtraceThreadSync::Cmd cmd,
444     std::unique_lock<std::mutex> pass_lock_from_caller) {
445   PERFETTO_DCHECK_THREAD(thread_checker_);
446   {
447     std::unique_lock<std::mutex> lock(std::move(pass_lock_from_caller));
448     if (!lock.owns_lock())
449       lock = std::unique_lock<std::mutex>(thread_sync_.mutex);
450 
451     if (thread_sync_.cmd == FtraceThreadSync::kQuit &&
452         cmd != FtraceThreadSync::kQuit) {
453       // If in kQuit state, we should never issue any other commands.
454       return;
455     }
456 
457     thread_sync_.cmd = cmd;
458     thread_sync_.cmd_id++;
459   }
460 
461   // Send a SIGPIPE to all worker threads to wake them up if they are sitting in
462   // a blocking splice(). If they are not and instead they are sitting in the
463   // cond-variable.wait(), this, together with the one below, will have at best
464   // the same effect of a spurious wakeup, depending on the implementation of
465   // the condition variable.
466   for (const auto& cpu_reader : cpu_readers_)
467     cpu_reader->InterruptWorkerThreadWithSignal();
468 
469   thread_sync_.cond.notify_all();
470 }
471 
NotifyFlushCompleteToStartedDataSources(FlushRequestID flush_request_id)472 void FtraceController::NotifyFlushCompleteToStartedDataSources(
473     FlushRequestID flush_request_id) {
474   PERFETTO_DCHECK_THREAD(thread_checker_);
475   for (FtraceDataSource* data_source : started_data_sources_)
476     data_source->OnFtraceFlushComplete(flush_request_id);
477 }
478 
479 FtraceController::Observer::~Observer() = default;
480 
481 }  // namespace perfetto
482