/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/traced/probes/probes_producer.h" #include #include #include #include #include #include "perfetto/base/logging.h" #include "perfetto/base/utils.h" #include "perfetto/base/weak_ptr.h" #include "perfetto/traced/traced.h" #include "perfetto/tracing/core/data_source_config.h" #include "perfetto/tracing/core/data_source_descriptor.h" #include "perfetto/tracing/core/ftrace_config.h" #include "perfetto/tracing/core/trace_config.h" #include "perfetto/tracing/core/trace_packet.h" #include "perfetto/tracing/ipc/producer_ipc_client.h" #include "src/traced/probes/android_log/android_log_data_source.h" #include "src/traced/probes/filesystem/inode_file_data_source.h" #include "src/traced/probes/ftrace/ftrace_data_source.h" #include "src/traced/probes/packages_list/packages_list_data_source.h" #include "src/traced/probes/power/android_power_data_source.h" #include "src/traced/probes/probes_data_source.h" #include "src/traced/probes/ps/process_stats_data_source.h" #include "src/traced/probes/sys_stats/sys_stats_data_source.h" #include "perfetto/trace/filesystem/inode_file_map.pbzero.h" #include "perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h" #include "perfetto/trace/ftrace/ftrace_stats.pbzero.h" #include "perfetto/trace/trace_packet.pbzero.h" namespace perfetto { namespace { constexpr uint32_t kInitialConnectionBackoffMs = 100; constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000; // Should be larger than FtraceController::kControllerFlushTimeoutMs. constexpr uint32_t kFlushTimeoutMs = 1000; constexpr char kFtraceSourceName[] = "linux.ftrace"; constexpr char kProcessStatsSourceName[] = "linux.process_stats"; constexpr char kInodeMapSourceName[] = "linux.inode_file_map"; constexpr char kSysStatsSourceName[] = "linux.sys_stats"; constexpr char kAndroidPowerSourceName[] = "android.power"; constexpr char kAndroidLogSourceName[] = "android.log"; constexpr char kPackagesListSourceName[] = "android.packages_list"; } // namespace. // State transition diagram: // +----------------------------+ // v + // NotStarted -> NotConnected -> Connecting -> Connected // ^ + // +--------------+ // ProbesProducer::ProbesProducer() : weak_factory_(this) {} ProbesProducer::~ProbesProducer() { // The ftrace data sources must be deleted before the ftrace controller. data_sources_.clear(); ftrace_.reset(); } void ProbesProducer::OnConnect() { PERFETTO_DCHECK(state_ == kConnecting); state_ = kConnected; ResetConnectionBackoff(); PERFETTO_LOG("Connected to the service"); { DataSourceDescriptor desc; desc.set_name(kFtraceSourceName); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kProcessStatsSourceName); desc.set_handles_incremental_state_clear(true); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kInodeMapSourceName); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kSysStatsSourceName); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kAndroidPowerSourceName); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kAndroidLogSourceName); endpoint_->RegisterDataSource(desc); } { DataSourceDescriptor desc; desc.set_name(kPackagesListSourceName); endpoint_->RegisterDataSource(desc); } } void ProbesProducer::OnDisconnect() { PERFETTO_DCHECK(state_ == kConnected || state_ == kConnecting); PERFETTO_LOG("Disconnected from tracing service"); if (state_ == kConnected) return task_runner_->PostTask([this] { this->Restart(); }); state_ = kNotConnected; IncreaseConnectionBackoff(); task_runner_->PostDelayedTask([this] { this->Connect(); }, connection_backoff_ms_); } void ProbesProducer::Restart() { // We lost the connection with the tracing service. At this point we need // to reset all the data sources. Trying to handle that manually is going to // be error prone. What we do here is simply desroying the instance and // recreating it again. // TODO(hjd): Add e2e test for this. base::TaskRunner* task_runner = task_runner_; const char* socket_name = socket_name_; // Invoke destructor and then the constructor again. this->~ProbesProducer(); new (this) ProbesProducer(); ConnectWithRetries(socket_name, task_runner); } void ProbesProducer::SetupDataSource(DataSourceInstanceID instance_id, const DataSourceConfig& config) { PERFETTO_DLOG("SetupDataSource(id=%" PRIu64 ", name=%s)", instance_id, config.name().c_str()); PERFETTO_DCHECK(data_sources_.count(instance_id) == 0); TracingSessionID session_id = config.tracing_session_id(); PERFETTO_CHECK(session_id > 0); std::unique_ptr data_source; if (config.name() == kFtraceSourceName) { data_source = CreateFtraceDataSource(session_id, config); } else if (config.name() == kInodeMapSourceName) { data_source = CreateInodeFileDataSource(session_id, config); } else if (config.name() == kProcessStatsSourceName) { data_source = CreateProcessStatsDataSource(session_id, config); } else if (config.name() == kSysStatsSourceName) { data_source = CreateSysStatsDataSource(session_id, config); } else if (config.name() == kAndroidPowerSourceName) { data_source = CreateAndroidPowerDataSource(session_id, config); } else if (config.name() == kAndroidLogSourceName) { data_source = CreateAndroidLogDataSource(session_id, config); } else if (config.name() == kPackagesListSourceName) { data_source = CreatePackagesListDataSource(session_id, config); } if (!data_source) { PERFETTO_ELOG("Failed to create data source '%s'", config.name().c_str()); return; } session_data_sources_.emplace(session_id, data_source.get()); data_sources_[instance_id] = std::move(data_source); } void ProbesProducer::StartDataSource(DataSourceInstanceID instance_id, const DataSourceConfig& config) { PERFETTO_DLOG("StartDataSource(id=%" PRIu64 ", name=%s)", instance_id, config.name().c_str()); auto it = data_sources_.find(instance_id); if (it == data_sources_.end()) { // Can happen if SetupDataSource() failed (e.g. ftrace was busy). PERFETTO_ELOG("Data source id=%" PRIu64 " not found", instance_id); return; } ProbesDataSource* data_source = it->second.get(); if (data_source->started) return; if (config.trace_duration_ms() != 0) { uint32_t timeout = 5000 + 2 * config.trace_duration_ms(); watchdogs_.emplace( instance_id, base::Watchdog::GetInstance()->CreateFatalTimer(timeout)); } data_source->started = true; data_source->Start(); } std::unique_ptr ProbesProducer::CreateFtraceDataSource( TracingSessionID session_id, const DataSourceConfig& config) { // Don't retry if FtraceController::Create() failed once. // This can legitimately happen on user builds where we cannot access the // debug paths, e.g., because of SELinux rules. if (ftrace_creation_failed_) return nullptr; // Lazily create on the first instance. if (!ftrace_) { ftrace_ = FtraceController::Create(task_runner_, this); if (!ftrace_) { PERFETTO_ELOG("Failed to create FtraceController"); ftrace_creation_failed_ = true; return nullptr; } ftrace_->DisableAllEvents(); ftrace_->ClearTrace(); } PERFETTO_LOG("Ftrace setup (target_buf=%" PRIu32 ")", config.target_buffer()); const BufferID buffer_id = static_cast(config.target_buffer()); std::unique_ptr data_source(new FtraceDataSource( ftrace_->GetWeakPtr(), session_id, config.ftrace_config(), endpoint_->CreateTraceWriter(buffer_id))); if (!ftrace_->AddDataSource(data_source.get())) { PERFETTO_ELOG( "Failed to setup tracing (too many concurrent sessions or ftrace is " "already in use)"); return nullptr; } return std::move(data_source); } std::unique_ptr ProbesProducer::CreateInodeFileDataSource( TracingSessionID session_id, DataSourceConfig source_config) { PERFETTO_LOG("Inode file map setup (target_buf=%" PRIu32 ")", source_config.target_buffer()); auto buffer_id = static_cast(source_config.target_buffer()); if (system_inodes_.empty()) CreateStaticDeviceToInodeMap("/system", &system_inodes_); return std::unique_ptr(new InodeFileDataSource( std::move(source_config), task_runner_, session_id, &system_inodes_, &cache_, endpoint_->CreateTraceWriter(buffer_id))); } std::unique_ptr ProbesProducer::CreateProcessStatsDataSource( TracingSessionID session_id, const DataSourceConfig& config) { auto buffer_id = static_cast(config.target_buffer()); return std::unique_ptr(new ProcessStatsDataSource( task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id), config)); } std::unique_ptr ProbesProducer::CreateAndroidPowerDataSource( TracingSessionID session_id, const DataSourceConfig& config) { auto buffer_id = static_cast(config.target_buffer()); return std::unique_ptr( new AndroidPowerDataSource(config, task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id))); } std::unique_ptr ProbesProducer::CreateAndroidLogDataSource( TracingSessionID session_id, const DataSourceConfig& config) { auto buffer_id = static_cast(config.target_buffer()); return std::unique_ptr( new AndroidLogDataSource(config, task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id))); } std::unique_ptr ProbesProducer::CreatePackagesListDataSource( TracingSessionID session_id, const DataSourceConfig& config) { auto buffer_id = static_cast(config.target_buffer()); return std::unique_ptr(new PackagesListDataSource( config, session_id, endpoint_->CreateTraceWriter(buffer_id))); } std::unique_ptr ProbesProducer::CreateSysStatsDataSource( TracingSessionID session_id, const DataSourceConfig& config) { auto buffer_id = static_cast(config.target_buffer()); return std::unique_ptr( new SysStatsDataSource(task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id), config)); } void ProbesProducer::StopDataSource(DataSourceInstanceID id) { PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id); auto it = data_sources_.find(id); if (it == data_sources_.end()) { // Can happen if SetupDataSource() failed (e.g. ftrace was busy). PERFETTO_ELOG("Cannot stop data source id=%" PRIu64 ", not found", id); return; } ProbesDataSource* data_source = it->second.get(); TracingSessionID session_id = data_source->tracing_session_id; auto range = session_data_sources_.equal_range(session_id); for (auto kv = range.first; kv != range.second; kv++) { if (kv->second != data_source) continue; session_data_sources_.erase(kv); break; } data_sources_.erase(it); watchdogs_.erase(id); } void ProbesProducer::OnTracingSetup() {} void ProbesProducer::Flush(FlushRequestID flush_request_id, const DataSourceInstanceID* data_source_ids, size_t num_data_sources) { PERFETTO_DCHECK(flush_request_id); auto weak_this = weak_factory_.GetWeakPtr(); // Issue a Flush() to all started data sources. bool flush_queued = false; for (size_t i = 0; i < num_data_sources; i++) { DataSourceInstanceID ds_id = data_source_ids[i]; auto it = data_sources_.find(ds_id); if (it == data_sources_.end() || !it->second->started) continue; pending_flushes_.emplace(flush_request_id, ds_id); flush_queued = true; auto flush_callback = [weak_this, flush_request_id, ds_id] { if (weak_this) weak_this->OnDataSourceFlushComplete(flush_request_id, ds_id); }; it->second->Flush(flush_request_id, flush_callback); } // If there is nothing to flush, ack immediately. if (!flush_queued) { endpoint_->NotifyFlushComplete(flush_request_id); return; } // Otherwise, post the timeout task. task_runner_->PostDelayedTask( [weak_this, flush_request_id] { if (weak_this) weak_this->OnFlushTimeout(flush_request_id); }, kFlushTimeoutMs); } void ProbesProducer::OnDataSourceFlushComplete(FlushRequestID flush_request_id, DataSourceInstanceID ds_id) { PERFETTO_DLOG("Flush %" PRIu64 " acked by data source %" PRIu64, flush_request_id, ds_id); auto range = pending_flushes_.equal_range(flush_request_id); for (auto it = range.first; it != range.second; it++) { if (it->second == ds_id) { pending_flushes_.erase(it); break; } } if (pending_flushes_.count(flush_request_id)) return; // Still waiting for other data sources to ack. PERFETTO_DLOG("All data sources acked to flush %" PRIu64, flush_request_id); endpoint_->NotifyFlushComplete(flush_request_id); } void ProbesProducer::OnFlushTimeout(FlushRequestID flush_request_id) { if (pending_flushes_.count(flush_request_id) == 0) return; // All acked. PERFETTO_ELOG("Flush(%" PRIu64 ") timed out", flush_request_id); pending_flushes_.erase(flush_request_id); endpoint_->NotifyFlushComplete(flush_request_id); } void ProbesProducer::ClearIncrementalState( const DataSourceInstanceID* data_source_ids, size_t num_data_sources) { for (size_t i = 0; i < num_data_sources; i++) { DataSourceInstanceID ds_id = data_source_ids[i]; auto it = data_sources_.find(ds_id); if (it == data_sources_.end() || !it->second->started) continue; it->second->ClearIncrementalState(); } } // This function is called by the FtraceController in batches, whenever it has // read one or more pages from one or more cpus and written that into the // userspace tracing buffer. If more than one ftrace data sources are active, // this call typically happens after writing for all session has been handled. void ProbesProducer::OnFtraceDataWrittenIntoDataSourceBuffers() { TracingSessionID last_session_id = 0; FtraceMetadata* metadata = nullptr; InodeFileDataSource* inode_data_source = nullptr; ProcessStatsDataSource* ps_data_source = nullptr; // unordered_multimap guarantees that entries with the same key are contiguous // in the iteration. for (auto it = session_data_sources_.begin(); /* check below*/; it++) { // If this is the last iteration or the session id has changed, // dispatch the metadata update to the linked data sources, if any. if (it == session_data_sources_.end() || it->first != last_session_id) { bool has_inodes = metadata && !metadata->inode_and_device.empty(); bool has_pids = metadata && !metadata->pids.empty(); bool has_rename_pids = metadata && !metadata->rename_pids.empty(); if (has_inodes && inode_data_source) inode_data_source->OnInodes(metadata->inode_and_device); // Ordering the rename pids before the seen pids is important so that any // renamed processes get scraped in the OnPids call. if (has_rename_pids && ps_data_source) ps_data_source->OnRenamePids(metadata->rename_pids); if (has_pids && ps_data_source) ps_data_source->OnPids(metadata->pids); if (metadata) metadata->Clear(); metadata = nullptr; inode_data_source = nullptr; ps_data_source = nullptr; if (it == session_data_sources_.end()) break; last_session_id = it->first; } ProbesDataSource* ds = it->second; if (!ds->started) continue; switch (ds->type_id) { case FtraceDataSource::kTypeId: metadata = static_cast(ds)->mutable_metadata(); break; case InodeFileDataSource::kTypeId: inode_data_source = static_cast(ds); break; case ProcessStatsDataSource::kTypeId: { // A trace session might have declared more than one ps data source. // In those cases we often use one for a full dump on startup ( // targeting a dedicated buffer) and another one for on-demand dumps // targeting the main buffer. // Only use the one that has on-demand dumps enabled, if any. auto ps = static_cast(ds); if (ps->on_demand_dumps_enabled()) ps_data_source = ps; break; } case SysStatsDataSource::kTypeId: case AndroidLogDataSource::kTypeId: case PackagesListDataSource::kTypeId: break; default: PERFETTO_DFATAL("Invalid data source."); } // switch (type_id) } // for (session_data_sources_) } void ProbesProducer::ConnectWithRetries(const char* socket_name, base::TaskRunner* task_runner) { PERFETTO_DCHECK(state_ == kNotStarted); state_ = kNotConnected; ResetConnectionBackoff(); socket_name_ = socket_name; task_runner_ = task_runner; Connect(); } void ProbesProducer::Connect() { PERFETTO_DCHECK(state_ == kNotConnected); state_ = kConnecting; endpoint_ = ProducerIPCClient::Connect( socket_name_, this, "perfetto.traced_probes", task_runner_); } void ProbesProducer::IncreaseConnectionBackoff() { connection_backoff_ms_ *= 2; if (connection_backoff_ms_ > kMaxConnectionBackoffMs) connection_backoff_ms_ = kMaxConnectionBackoffMs; } void ProbesProducer::ResetConnectionBackoff() { connection_backoff_ms_ = kInitialConnectionBackoffMs; } } // namespace perfetto