1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
18 #define SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
19 
20 #include <functional>
21 #include <map>
22 
23 #include "perfetto/base/optional.h"
24 #include "perfetto/base/task_runner.h"
25 #include "perfetto/base/unix_socket.h"
26 #include "perfetto/base/unix_task_runner.h"
27 
28 #include "perfetto/tracing/core/basic_types.h"
29 #include "perfetto/tracing/core/data_source_config.h"
30 #include "perfetto/tracing/core/producer.h"
31 #include "perfetto/tracing/core/trace_writer.h"
32 #include "perfetto/tracing/core/tracing_service.h"
33 
34 #include "src/profiling/memory/bookkeeping.h"
35 #include "src/profiling/memory/proc_utils.h"
36 #include "src/profiling/memory/system_property.h"
37 #include "src/profiling/memory/unwinding.h"
38 
39 namespace perfetto {
40 namespace profiling {
41 
42 struct Process {
43   pid_t pid;
44   std::string cmdline;
45 };
46 
47 class LogHistogram {
48  public:
49   static const uint64_t kMaxBucket;
50   static constexpr size_t kBuckets = 20;
51 
Add(uint64_t value)52   void Add(uint64_t value) { values_[GetBucket(value)]++; }
53   std::vector<std::pair<uint64_t, uint64_t>> GetData();
54 
55  private:
56   size_t GetBucket(uint64_t value);
57 
58   std::array<uint64_t, kBuckets> values_ = {};
59 };
60 
61 // TODO(rsavitski): central daemon can do less work if it knows that the global
62 // operating mode is fork-based, as it then will not be interacting with the
63 // clients. This can be implemented as an additional mode here.
64 enum class HeapprofdMode { kCentral, kChild };
65 
66 // Heap profiling producer. Can be instantiated in two modes, central and
67 // child (also referred to as fork mode).
68 //
69 // The central mode producer is instantiated by the system heapprofd daemon. Its
70 // primary responsibility is activating profiling (via system properties and
71 // signals) in targets identified by profiling configs. On debug platform
72 // builds, the central producer can also handle the out-of-process unwinding &
73 // writing of the profiles for all client processes.
74 //
75 // An alternative model is where the central heapprofd triggers the profiling in
76 // the target process, but the latter fork-execs a private heapprofd binary to
77 // handle unwinding only for that process. The forked heapprofd instantiates
78 // this producer in the "child" mode. In this scenario, the profiled process
79 // never talks to the system daemon.
80 //
81 // TODO(fmayer||rsavitski): cover interesting invariants/structure of the
82 // implementation (e.g. number of data sources in child mode), including
83 // threading structure.
84 class HeapprofdProducer : public Producer, public UnwindingWorker::Delegate {
85  public:
86   friend class SocketDelegate;
87 
88   // TODO(fmayer): Split into two delegates for the listening socket in kCentral
89   // and for the per-client sockets to make this easier to understand?
90   // Alternatively, find a better name for this.
91   class SocketDelegate : public base::UnixSocket::EventListener {
92    public:
SocketDelegate(HeapprofdProducer * producer)93     SocketDelegate(HeapprofdProducer* producer) : producer_(producer) {}
94 
95     void OnDisconnect(base::UnixSocket* self) override;
96     void OnNewIncomingConnection(
97         base::UnixSocket* self,
98         std::unique_ptr<base::UnixSocket> new_connection) override;
99     void OnDataAvailable(base::UnixSocket* self) override;
100 
101    private:
102     HeapprofdProducer* producer_;
103   };
104 
105   HeapprofdProducer(HeapprofdMode mode, base::TaskRunner* task_runner);
106   ~HeapprofdProducer() override;
107 
108   // Producer Impl:
109   void OnConnect() override;
110   void OnDisconnect() override;
111   void SetupDataSource(DataSourceInstanceID, const DataSourceConfig&) override;
112   void StartDataSource(DataSourceInstanceID, const DataSourceConfig&) override;
113   void StopDataSource(DataSourceInstanceID) override;
114   void OnTracingSetup() override;
115   void Flush(FlushRequestID,
116              const DataSourceInstanceID* data_source_ids,
117              size_t num_data_sources) override;
ClearIncrementalState(const DataSourceInstanceID *,size_t)118   void ClearIncrementalState(const DataSourceInstanceID* /*data_source_ids*/,
119                              size_t /*num_data_sources*/) override {}
120 
121   // TODO(fmayer): Refactor once/if we have generic reconnect logic.
122   void ConnectWithRetries(const char* socket_name);
123   void DumpAll();
124 
125   // UnwindingWorker::Delegate impl:
126   void PostAllocRecord(AllocRecord) override;
127   void PostFreeRecord(FreeRecord) override;
128   void PostSocketDisconnected(DataSourceInstanceID,
129                               pid_t,
130                               SharedRingBuffer::Stats) override;
131 
132   void HandleAllocRecord(AllocRecord);
133   void HandleFreeRecord(FreeRecord);
134   void HandleSocketDisconnected(DataSourceInstanceID,
135                                 pid_t,
136                                 SharedRingBuffer::Stats);
137 
138   // Valid only if mode_ == kChild.
139   void SetTargetProcess(pid_t target_pid,
140                         std::string target_cmdline,
141                         base::ScopedFile inherited_socket);
142   // Valid only if mode_ == kChild. Kicks off a periodic check that the child
143   // heapprofd is actively working on a data source (which should correspond to
144   // the target process). The first check is delayed to let the freshly spawned
145   // producer get the data sources from the tracing service (i.e. traced).
146   void ScheduleActiveDataSourceWatchdog();
147 
148   // Exposed for testing.
149   void SetProducerEndpoint(
150       std::unique_ptr<TracingService::ProducerEndpoint> endpoint);
151 
152  private:
153   // State of the connection to tracing service (traced).
154   enum State {
155     kNotStarted = 0,
156     kNotConnected,
157     kConnecting,
158     kConnected,
159   };
160 
161   struct ProcessState {
ProcessStateProcessState162     ProcessState(GlobalCallstackTrie* callsites) : heap_tracker(callsites) {}
163     bool disconnected = false;
164     bool buffer_overran = false;
165     bool buffer_corrupted = false;
166 
167     uint64_t heap_samples = 0;
168     uint64_t map_reparses = 0;
169     uint64_t unwinding_errors = 0;
170 
171     uint64_t total_unwinding_time_us = 0;
172     LogHistogram unwinding_time_us;
173     HeapTracker heap_tracker;
174   };
175 
176   struct DataSource {
177     DataSourceInstanceID id;
178     std::unique_ptr<TraceWriter> trace_writer;
179     HeapprofdConfig config;
180     ClientConfiguration client_configuration;
181     std::vector<SystemProperties::Handle> properties;
182     std::set<pid_t> signaled_pids;
183     std::set<pid_t> rejected_pids;
184     std::map<pid_t, ProcessState> process_states;
185     std::vector<std::string> normalized_cmdlines;
186     uint64_t next_index_ = 0;
187   };
188 
189   struct PendingProcess {
190     std::unique_ptr<base::UnixSocket> sock;
191     DataSourceInstanceID data_source_instance_id;
192     SharedRingBuffer shmem;
193   };
194 
195   void HandleClientConnection(std::unique_ptr<base::UnixSocket> new_connection,
196                               Process process);
197 
198   void ConnectService();
199   void Restart();
200   void ResetConnectionBackoff();
201   void IncreaseConnectionBackoff();
202 
203   void FinishDataSourceFlush(FlushRequestID flush_id);
204   bool Dump(DataSourceInstanceID id,
205             FlushRequestID flush_id,
206             bool has_flush_id);
207   void DoContinuousDump(DataSourceInstanceID id, uint32_t dump_interval);
208 
209   UnwindingWorker& UnwinderForPID(pid_t);
210   bool IsPidProfiled(pid_t);
211   DataSource* GetDataSourceForProcess(const Process& proc);
212   void RecordOtherSourcesAsRejected(DataSource* active_ds, const Process& proc);
213 
214   // Specific to mode_ == kCentral
215   std::unique_ptr<base::UnixSocket> MakeListeningSocket();
216 
217   // Specific to mode_ == kChild
218   void TerminateProcess(int exit_status);
219   // Specific to mode_ == kChild
220   void ActiveDataSourceWatchdogCheck();
221   // Adopts the (connected) sockets inherited from the target process, invoking
222   // the on-connection callback.
223   // Specific to mode_ == kChild
224   void AdoptTargetProcessSocket();
225 
226   // Class state:
227 
228   // Task runner is owned by the main thread.
229   base::TaskRunner* const task_runner_;
230   const HeapprofdMode mode_;
231 
232   // State of connection to the tracing service.
233   State state_ = kNotStarted;
234   uint32_t connection_backoff_ms_ = 0;
235   const char* producer_sock_name_ = nullptr;
236 
237   // Client processes that have connected, but with which we have not yet
238   // finished the handshake.
239   std::map<pid_t, PendingProcess> pending_processes_;
240 
241   // Must outlive data_sources_ - owns at least the shared memory referenced by
242   // TraceWriters.
243   std::unique_ptr<TracingService::ProducerEndpoint> endpoint_;
244 
245   // Must outlive data_sources_ - HeapTracker references the trie.
246   GlobalCallstackTrie callsites_;
247 
248   // Must outlive data_sources_ - DataSource can hold
249   // SystemProperties::Handle-s.
250   // Specific to mode_ == kCentral
251   SystemProperties properties_;
252 
253   std::map<FlushRequestID, size_t> flushes_in_progress_;
254   std::map<DataSourceInstanceID, DataSource> data_sources_;
255   std::vector<UnwindingWorker> unwinding_workers_;
256 
257   // Specific to mode_ == kCentral
258   std::unique_ptr<base::UnixSocket> listening_socket_;
259 
260   // Specific to mode_ == kChild
261   Process target_process_{base::kInvalidPid, ""};
262   // This is a valid FD only between SetTargetProcess and
263   // AdoptTargetProcessSocket.
264   // Specific to mode_ == kChild
265   base::ScopedFile inherited_fd_;
266 
267   SocketDelegate socket_delegate_;
268 
269   base::WeakPtrFactory<HeapprofdProducer> weak_factory_;  // Keep last.
270 };
271 
272 }  // namespace profiling
273 }  // namespace perfetto
274 
275 #endif  // SRC_PROFILING_MEMORY_HEAPPROFD_PRODUCER_H_
276