1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/probes_producer.h"
18 
19 #include <stdio.h>
20 #include <sys/stat.h>
21 
22 #include <algorithm>
23 #include <queue>
24 #include <string>
25 
26 #include "perfetto/base/logging.h"
27 #include "perfetto/base/utils.h"
28 #include "perfetto/base/weak_ptr.h"
29 #include "perfetto/traced/traced.h"
30 #include "perfetto/tracing/core/data_source_config.h"
31 #include "perfetto/tracing/core/data_source_descriptor.h"
32 #include "perfetto/tracing/core/ftrace_config.h"
33 #include "perfetto/tracing/core/trace_config.h"
34 #include "perfetto/tracing/core/trace_packet.h"
35 #include "perfetto/tracing/ipc/producer_ipc_client.h"
36 #include "src/traced/probes/android_log/android_log_data_source.h"
37 #include "src/traced/probes/filesystem/inode_file_data_source.h"
38 #include "src/traced/probes/ftrace/ftrace_data_source.h"
39 #include "src/traced/probes/packages_list/packages_list_data_source.h"
40 #include "src/traced/probes/power/android_power_data_source.h"
41 #include "src/traced/probes/probes_data_source.h"
42 #include "src/traced/probes/ps/process_stats_data_source.h"
43 #include "src/traced/probes/sys_stats/sys_stats_data_source.h"
44 
45 #include "perfetto/trace/filesystem/inode_file_map.pbzero.h"
46 #include "perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
47 #include "perfetto/trace/ftrace/ftrace_stats.pbzero.h"
48 #include "perfetto/trace/trace_packet.pbzero.h"
49 
50 namespace perfetto {
51 namespace {
52 
53 constexpr uint32_t kInitialConnectionBackoffMs = 100;
54 constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000;
55 
56 // Should be larger than FtraceController::kControllerFlushTimeoutMs.
57 constexpr uint32_t kFlushTimeoutMs = 1000;
58 
59 constexpr char kFtraceSourceName[] = "linux.ftrace";
60 constexpr char kProcessStatsSourceName[] = "linux.process_stats";
61 constexpr char kInodeMapSourceName[] = "linux.inode_file_map";
62 constexpr char kSysStatsSourceName[] = "linux.sys_stats";
63 constexpr char kAndroidPowerSourceName[] = "android.power";
64 constexpr char kAndroidLogSourceName[] = "android.log";
65 constexpr char kPackagesListSourceName[] = "android.packages_list";
66 
67 }  // namespace.
68 
69 // State transition diagram:
70 //                    +----------------------------+
71 //                    v                            +
72 // NotStarted -> NotConnected -> Connecting -> Connected
73 //                    ^              +
74 //                    +--------------+
75 //
76 
ProbesProducer()77 ProbesProducer::ProbesProducer() : weak_factory_(this) {}
~ProbesProducer()78 ProbesProducer::~ProbesProducer() {
79   // The ftrace data sources must be deleted before the ftrace controller.
80   data_sources_.clear();
81   ftrace_.reset();
82 }
83 
OnConnect()84 void ProbesProducer::OnConnect() {
85   PERFETTO_DCHECK(state_ == kConnecting);
86   state_ = kConnected;
87   ResetConnectionBackoff();
88   PERFETTO_LOG("Connected to the service");
89 
90   {
91     DataSourceDescriptor desc;
92     desc.set_name(kFtraceSourceName);
93     endpoint_->RegisterDataSource(desc);
94   }
95 
96   {
97     DataSourceDescriptor desc;
98     desc.set_name(kProcessStatsSourceName);
99     desc.set_handles_incremental_state_clear(true);
100     endpoint_->RegisterDataSource(desc);
101   }
102 
103   {
104     DataSourceDescriptor desc;
105     desc.set_name(kInodeMapSourceName);
106     endpoint_->RegisterDataSource(desc);
107   }
108 
109   {
110     DataSourceDescriptor desc;
111     desc.set_name(kSysStatsSourceName);
112     endpoint_->RegisterDataSource(desc);
113   }
114 
115   {
116     DataSourceDescriptor desc;
117     desc.set_name(kAndroidPowerSourceName);
118     endpoint_->RegisterDataSource(desc);
119   }
120 
121   {
122     DataSourceDescriptor desc;
123     desc.set_name(kAndroidLogSourceName);
124     endpoint_->RegisterDataSource(desc);
125   }
126 
127   {
128     DataSourceDescriptor desc;
129     desc.set_name(kPackagesListSourceName);
130     endpoint_->RegisterDataSource(desc);
131   }
132 }
133 
OnDisconnect()134 void ProbesProducer::OnDisconnect() {
135   PERFETTO_DCHECK(state_ == kConnected || state_ == kConnecting);
136   PERFETTO_LOG("Disconnected from tracing service");
137   if (state_ == kConnected)
138     return task_runner_->PostTask([this] { this->Restart(); });
139 
140   state_ = kNotConnected;
141   IncreaseConnectionBackoff();
142   task_runner_->PostDelayedTask([this] { this->Connect(); },
143                                 connection_backoff_ms_);
144 }
145 
Restart()146 void ProbesProducer::Restart() {
147   // We lost the connection with the tracing service. At this point we need
148   // to reset all the data sources. Trying to handle that manually is going to
149   // be error prone. What we do here is simply desroying the instance and
150   // recreating it again.
151   // TODO(hjd): Add e2e test for this.
152 
153   base::TaskRunner* task_runner = task_runner_;
154   const char* socket_name = socket_name_;
155 
156   // Invoke destructor and then the constructor again.
157   this->~ProbesProducer();
158   new (this) ProbesProducer();
159 
160   ConnectWithRetries(socket_name, task_runner);
161 }
162 
SetupDataSource(DataSourceInstanceID instance_id,const DataSourceConfig & config)163 void ProbesProducer::SetupDataSource(DataSourceInstanceID instance_id,
164                                      const DataSourceConfig& config) {
165   PERFETTO_DLOG("SetupDataSource(id=%" PRIu64 ", name=%s)", instance_id,
166                 config.name().c_str());
167   PERFETTO_DCHECK(data_sources_.count(instance_id) == 0);
168   TracingSessionID session_id = config.tracing_session_id();
169   PERFETTO_CHECK(session_id > 0);
170 
171   std::unique_ptr<ProbesDataSource> data_source;
172   if (config.name() == kFtraceSourceName) {
173     data_source = CreateFtraceDataSource(session_id, config);
174   } else if (config.name() == kInodeMapSourceName) {
175     data_source = CreateInodeFileDataSource(session_id, config);
176   } else if (config.name() == kProcessStatsSourceName) {
177     data_source = CreateProcessStatsDataSource(session_id, config);
178   } else if (config.name() == kSysStatsSourceName) {
179     data_source = CreateSysStatsDataSource(session_id, config);
180   } else if (config.name() == kAndroidPowerSourceName) {
181     data_source = CreateAndroidPowerDataSource(session_id, config);
182   } else if (config.name() == kAndroidLogSourceName) {
183     data_source = CreateAndroidLogDataSource(session_id, config);
184   } else if (config.name() == kPackagesListSourceName) {
185     data_source = CreatePackagesListDataSource(session_id, config);
186   }
187 
188   if (!data_source) {
189     PERFETTO_ELOG("Failed to create data source '%s'", config.name().c_str());
190     return;
191   }
192 
193   session_data_sources_.emplace(session_id, data_source.get());
194   data_sources_[instance_id] = std::move(data_source);
195 }
196 
StartDataSource(DataSourceInstanceID instance_id,const DataSourceConfig & config)197 void ProbesProducer::StartDataSource(DataSourceInstanceID instance_id,
198                                      const DataSourceConfig& config) {
199   PERFETTO_DLOG("StartDataSource(id=%" PRIu64 ", name=%s)", instance_id,
200                 config.name().c_str());
201   auto it = data_sources_.find(instance_id);
202   if (it == data_sources_.end()) {
203     // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
204     PERFETTO_ELOG("Data source id=%" PRIu64 " not found", instance_id);
205     return;
206   }
207   ProbesDataSource* data_source = it->second.get();
208   if (data_source->started)
209     return;
210   if (config.trace_duration_ms() != 0) {
211     uint32_t timeout = 5000 + 2 * config.trace_duration_ms();
212     watchdogs_.emplace(
213         instance_id, base::Watchdog::GetInstance()->CreateFatalTimer(timeout));
214   }
215   data_source->started = true;
216   data_source->Start();
217 }
218 
CreateFtraceDataSource(TracingSessionID session_id,const DataSourceConfig & config)219 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateFtraceDataSource(
220     TracingSessionID session_id,
221     const DataSourceConfig& config) {
222   // Don't retry if FtraceController::Create() failed once.
223   // This can legitimately happen on user builds where we cannot access the
224   // debug paths, e.g., because of SELinux rules.
225   if (ftrace_creation_failed_)
226     return nullptr;
227 
228   // Lazily create on the first instance.
229   if (!ftrace_) {
230     ftrace_ = FtraceController::Create(task_runner_, this);
231 
232     if (!ftrace_) {
233       PERFETTO_ELOG("Failed to create FtraceController");
234       ftrace_creation_failed_ = true;
235       return nullptr;
236     }
237 
238     ftrace_->DisableAllEvents();
239     ftrace_->ClearTrace();
240   }
241 
242   PERFETTO_LOG("Ftrace setup (target_buf=%" PRIu32 ")", config.target_buffer());
243   const BufferID buffer_id = static_cast<BufferID>(config.target_buffer());
244   std::unique_ptr<FtraceDataSource> data_source(new FtraceDataSource(
245       ftrace_->GetWeakPtr(), session_id, config.ftrace_config(),
246       endpoint_->CreateTraceWriter(buffer_id)));
247   if (!ftrace_->AddDataSource(data_source.get())) {
248     PERFETTO_ELOG(
249         "Failed to setup tracing (too many concurrent sessions or ftrace is "
250         "already in use)");
251     return nullptr;
252   }
253   return std::move(data_source);
254 }
255 
CreateInodeFileDataSource(TracingSessionID session_id,DataSourceConfig source_config)256 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateInodeFileDataSource(
257     TracingSessionID session_id,
258     DataSourceConfig source_config) {
259   PERFETTO_LOG("Inode file map setup (target_buf=%" PRIu32 ")",
260                source_config.target_buffer());
261   auto buffer_id = static_cast<BufferID>(source_config.target_buffer());
262   if (system_inodes_.empty())
263     CreateStaticDeviceToInodeMap("/system", &system_inodes_);
264   return std::unique_ptr<InodeFileDataSource>(new InodeFileDataSource(
265       std::move(source_config), task_runner_, session_id, &system_inodes_,
266       &cache_, endpoint_->CreateTraceWriter(buffer_id)));
267 }
268 
CreateProcessStatsDataSource(TracingSessionID session_id,const DataSourceConfig & config)269 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateProcessStatsDataSource(
270     TracingSessionID session_id,
271     const DataSourceConfig& config) {
272   auto buffer_id = static_cast<BufferID>(config.target_buffer());
273   return std::unique_ptr<ProcessStatsDataSource>(new ProcessStatsDataSource(
274       task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id),
275       config));
276 }
277 
CreateAndroidPowerDataSource(TracingSessionID session_id,const DataSourceConfig & config)278 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateAndroidPowerDataSource(
279     TracingSessionID session_id,
280     const DataSourceConfig& config) {
281   auto buffer_id = static_cast<BufferID>(config.target_buffer());
282   return std::unique_ptr<ProbesDataSource>(
283       new AndroidPowerDataSource(config, task_runner_, session_id,
284                                  endpoint_->CreateTraceWriter(buffer_id)));
285 }
286 
CreateAndroidLogDataSource(TracingSessionID session_id,const DataSourceConfig & config)287 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateAndroidLogDataSource(
288     TracingSessionID session_id,
289     const DataSourceConfig& config) {
290   auto buffer_id = static_cast<BufferID>(config.target_buffer());
291   return std::unique_ptr<ProbesDataSource>(
292       new AndroidLogDataSource(config, task_runner_, session_id,
293                                endpoint_->CreateTraceWriter(buffer_id)));
294 }
295 
CreatePackagesListDataSource(TracingSessionID session_id,const DataSourceConfig & config)296 std::unique_ptr<ProbesDataSource> ProbesProducer::CreatePackagesListDataSource(
297     TracingSessionID session_id,
298     const DataSourceConfig& config) {
299   auto buffer_id = static_cast<BufferID>(config.target_buffer());
300   return std::unique_ptr<ProbesDataSource>(new PackagesListDataSource(
301       config, session_id, endpoint_->CreateTraceWriter(buffer_id)));
302 }
303 
CreateSysStatsDataSource(TracingSessionID session_id,const DataSourceConfig & config)304 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateSysStatsDataSource(
305     TracingSessionID session_id,
306     const DataSourceConfig& config) {
307   auto buffer_id = static_cast<BufferID>(config.target_buffer());
308   return std::unique_ptr<SysStatsDataSource>(
309       new SysStatsDataSource(task_runner_, session_id,
310                              endpoint_->CreateTraceWriter(buffer_id), config));
311 }
312 
StopDataSource(DataSourceInstanceID id)313 void ProbesProducer::StopDataSource(DataSourceInstanceID id) {
314   PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id);
315   auto it = data_sources_.find(id);
316   if (it == data_sources_.end()) {
317     // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
318     PERFETTO_ELOG("Cannot stop data source id=%" PRIu64 ", not found", id);
319     return;
320   }
321   ProbesDataSource* data_source = it->second.get();
322   TracingSessionID session_id = data_source->tracing_session_id;
323   auto range = session_data_sources_.equal_range(session_id);
324   for (auto kv = range.first; kv != range.second; kv++) {
325     if (kv->second != data_source)
326       continue;
327     session_data_sources_.erase(kv);
328     break;
329   }
330   data_sources_.erase(it);
331   watchdogs_.erase(id);
332 }
333 
OnTracingSetup()334 void ProbesProducer::OnTracingSetup() {}
335 
Flush(FlushRequestID flush_request_id,const DataSourceInstanceID * data_source_ids,size_t num_data_sources)336 void ProbesProducer::Flush(FlushRequestID flush_request_id,
337                            const DataSourceInstanceID* data_source_ids,
338                            size_t num_data_sources) {
339   PERFETTO_DCHECK(flush_request_id);
340   auto weak_this = weak_factory_.GetWeakPtr();
341 
342   // Issue a Flush() to all started data sources.
343   bool flush_queued = false;
344   for (size_t i = 0; i < num_data_sources; i++) {
345     DataSourceInstanceID ds_id = data_source_ids[i];
346     auto it = data_sources_.find(ds_id);
347     if (it == data_sources_.end() || !it->second->started)
348       continue;
349     pending_flushes_.emplace(flush_request_id, ds_id);
350     flush_queued = true;
351     auto flush_callback = [weak_this, flush_request_id, ds_id] {
352       if (weak_this)
353         weak_this->OnDataSourceFlushComplete(flush_request_id, ds_id);
354     };
355     it->second->Flush(flush_request_id, flush_callback);
356   }
357 
358   // If there is nothing to flush, ack immediately.
359   if (!flush_queued) {
360     endpoint_->NotifyFlushComplete(flush_request_id);
361     return;
362   }
363 
364   // Otherwise, post the timeout task.
365   task_runner_->PostDelayedTask(
366       [weak_this, flush_request_id] {
367         if (weak_this)
368           weak_this->OnFlushTimeout(flush_request_id);
369       },
370       kFlushTimeoutMs);
371 }
372 
OnDataSourceFlushComplete(FlushRequestID flush_request_id,DataSourceInstanceID ds_id)373 void ProbesProducer::OnDataSourceFlushComplete(FlushRequestID flush_request_id,
374                                                DataSourceInstanceID ds_id) {
375   PERFETTO_DLOG("Flush %" PRIu64 " acked by data source %" PRIu64,
376                 flush_request_id, ds_id);
377   auto range = pending_flushes_.equal_range(flush_request_id);
378   for (auto it = range.first; it != range.second; it++) {
379     if (it->second == ds_id) {
380       pending_flushes_.erase(it);
381       break;
382     }
383   }
384 
385   if (pending_flushes_.count(flush_request_id))
386     return;  // Still waiting for other data sources to ack.
387 
388   PERFETTO_DLOG("All data sources acked to flush %" PRIu64, flush_request_id);
389   endpoint_->NotifyFlushComplete(flush_request_id);
390 }
391 
OnFlushTimeout(FlushRequestID flush_request_id)392 void ProbesProducer::OnFlushTimeout(FlushRequestID flush_request_id) {
393   if (pending_flushes_.count(flush_request_id) == 0)
394     return;  // All acked.
395   PERFETTO_ELOG("Flush(%" PRIu64 ") timed out", flush_request_id);
396   pending_flushes_.erase(flush_request_id);
397   endpoint_->NotifyFlushComplete(flush_request_id);
398 }
399 
ClearIncrementalState(const DataSourceInstanceID * data_source_ids,size_t num_data_sources)400 void ProbesProducer::ClearIncrementalState(
401     const DataSourceInstanceID* data_source_ids,
402     size_t num_data_sources) {
403   for (size_t i = 0; i < num_data_sources; i++) {
404     DataSourceInstanceID ds_id = data_source_ids[i];
405     auto it = data_sources_.find(ds_id);
406     if (it == data_sources_.end() || !it->second->started)
407       continue;
408 
409     it->second->ClearIncrementalState();
410   }
411 }
412 
413 // This function is called by the FtraceController in batches, whenever it has
414 // read one or more pages from one or more cpus and written that into the
415 // userspace tracing buffer. If more than one ftrace data sources are active,
416 // this call typically happens after writing for all session has been handled.
OnFtraceDataWrittenIntoDataSourceBuffers()417 void ProbesProducer::OnFtraceDataWrittenIntoDataSourceBuffers() {
418   TracingSessionID last_session_id = 0;
419   FtraceMetadata* metadata = nullptr;
420   InodeFileDataSource* inode_data_source = nullptr;
421   ProcessStatsDataSource* ps_data_source = nullptr;
422 
423   // unordered_multimap guarantees that entries with the same key are contiguous
424   // in the iteration.
425   for (auto it = session_data_sources_.begin(); /* check below*/; it++) {
426     // If this is the last iteration or the session id has changed,
427     // dispatch the metadata update to the linked data sources, if any.
428     if (it == session_data_sources_.end() || it->first != last_session_id) {
429       bool has_inodes = metadata && !metadata->inode_and_device.empty();
430       bool has_pids = metadata && !metadata->pids.empty();
431       bool has_rename_pids = metadata && !metadata->rename_pids.empty();
432       if (has_inodes && inode_data_source)
433         inode_data_source->OnInodes(metadata->inode_and_device);
434       // Ordering the rename pids before the seen pids is important so that any
435       // renamed processes get scraped in the OnPids call.
436       if (has_rename_pids && ps_data_source)
437         ps_data_source->OnRenamePids(metadata->rename_pids);
438       if (has_pids && ps_data_source)
439         ps_data_source->OnPids(metadata->pids);
440       if (metadata)
441         metadata->Clear();
442       metadata = nullptr;
443       inode_data_source = nullptr;
444       ps_data_source = nullptr;
445       if (it == session_data_sources_.end())
446         break;
447       last_session_id = it->first;
448     }
449     ProbesDataSource* ds = it->second;
450     if (!ds->started)
451       continue;
452     switch (ds->type_id) {
453       case FtraceDataSource::kTypeId:
454         metadata = static_cast<FtraceDataSource*>(ds)->mutable_metadata();
455         break;
456       case InodeFileDataSource::kTypeId:
457         inode_data_source = static_cast<InodeFileDataSource*>(ds);
458         break;
459       case ProcessStatsDataSource::kTypeId: {
460         // A trace session might have declared more than one ps data source.
461         // In those cases we often use one for a full dump on startup (
462         // targeting a dedicated buffer) and another one for on-demand dumps
463         // targeting the main buffer.
464         // Only use the one that has on-demand dumps enabled, if any.
465         auto ps = static_cast<ProcessStatsDataSource*>(ds);
466         if (ps->on_demand_dumps_enabled())
467           ps_data_source = ps;
468         break;
469       }
470       case SysStatsDataSource::kTypeId:
471       case AndroidLogDataSource::kTypeId:
472       case PackagesListDataSource::kTypeId:
473         break;
474       default:
475         PERFETTO_DFATAL("Invalid data source.");
476     }  // switch (type_id)
477   }    // for (session_data_sources_)
478 }
479 
ConnectWithRetries(const char * socket_name,base::TaskRunner * task_runner)480 void ProbesProducer::ConnectWithRetries(const char* socket_name,
481                                         base::TaskRunner* task_runner) {
482   PERFETTO_DCHECK(state_ == kNotStarted);
483   state_ = kNotConnected;
484 
485   ResetConnectionBackoff();
486   socket_name_ = socket_name;
487   task_runner_ = task_runner;
488   Connect();
489 }
490 
Connect()491 void ProbesProducer::Connect() {
492   PERFETTO_DCHECK(state_ == kNotConnected);
493   state_ = kConnecting;
494   endpoint_ = ProducerIPCClient::Connect(
495       socket_name_, this, "perfetto.traced_probes", task_runner_);
496 }
497 
IncreaseConnectionBackoff()498 void ProbesProducer::IncreaseConnectionBackoff() {
499   connection_backoff_ms_ *= 2;
500   if (connection_backoff_ms_ > kMaxConnectionBackoffMs)
501     connection_backoff_ms_ = kMaxConnectionBackoffMs;
502 }
503 
ResetConnectionBackoff()504 void ProbesProducer::ResetConnectionBackoff() {
505   connection_backoff_ms_ = kInitialConnectionBackoffMs;
506 }
507 
508 }  // namespace perfetto
509