1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/memory/unwinding.h"
18
19 #include <sys/types.h>
20 #include <unistd.h>
21
22 #include <unwindstack/MachineArm.h>
23 #include <unwindstack/MachineArm64.h>
24 #include <unwindstack/MachineMips.h>
25 #include <unwindstack/MachineMips64.h>
26 #include <unwindstack/MachineX86.h>
27 #include <unwindstack/MachineX86_64.h>
28 #include <unwindstack/Maps.h>
29 #include <unwindstack/Memory.h>
30 #include <unwindstack/Regs.h>
31 #include <unwindstack/RegsArm.h>
32 #include <unwindstack/RegsArm64.h>
33 #include <unwindstack/RegsMips.h>
34 #include <unwindstack/RegsMips64.h>
35 #include <unwindstack/RegsX86.h>
36 #include <unwindstack/RegsX86_64.h>
37 #include <unwindstack/Unwinder.h>
38 #include <unwindstack/UserArm.h>
39 #include <unwindstack/UserArm64.h>
40 #include <unwindstack/UserMips.h>
41 #include <unwindstack/UserMips64.h>
42 #include <unwindstack/UserX86.h>
43 #include <unwindstack/UserX86_64.h>
44
45 #include <procinfo/process_map.h>
46
47 #include "perfetto/base/logging.h"
48 #include "perfetto/base/task_runner.h"
49 #include "perfetto/ext/base/file_utils.h"
50 #include "perfetto/ext/base/scoped_file.h"
51 #include "perfetto/ext/base/string_utils.h"
52 #include "perfetto/ext/base/thread_task_runner.h"
53
54 #include "src/profiling/memory/unwound_messages.h"
55 #include "src/profiling/memory/wire_protocol.h"
56
57 namespace perfetto {
58 namespace profiling {
59 namespace {
60
61 constexpr base::TimeMillis kMapsReparseInterval{500};
62 constexpr uint32_t kRetryDelayMs = 100;
63
64 constexpr size_t kMaxFrames = 500;
65
66 // We assume average ~300us per unwind. If we handle up to 1000 unwinds, this
67 // makes sure other tasks get to be run at least every 300ms if the unwinding
68 // saturates this thread.
69 constexpr size_t kUnwindBatchSize = 1000;
70 constexpr size_t kRecordBatchSize = 1024;
71 constexpr size_t kMaxAllocRecordArenaSize = 2 * kRecordBatchSize;
72
73 #pragma GCC diagnostic push
74 // We do not care about deterministic destructor order.
75 #pragma GCC diagnostic ignored "-Wglobal-constructors"
76 #pragma GCC diagnostic ignored "-Wexit-time-destructors"
77 static std::vector<std::string> kSkipMaps{"heapprofd_client.so",
78 "heapprofd_client_api.so"};
79 #pragma GCC diagnostic pop
80
GetRegsSize(unwindstack::Regs * regs)81 size_t GetRegsSize(unwindstack::Regs* regs) {
82 if (regs->Is32Bit())
83 return sizeof(uint32_t) * regs->total_regs();
84 return sizeof(uint64_t) * regs->total_regs();
85 }
86
ReadFromRawData(unwindstack::Regs * regs,void * raw_data)87 void ReadFromRawData(unwindstack::Regs* regs, void* raw_data) {
88 memcpy(regs->RawData(), raw_data, GetRegsSize(regs));
89 }
90
91 } // namespace
92
CreateRegsFromRawData(unwindstack::ArchEnum arch,void * raw_data)93 std::unique_ptr<unwindstack::Regs> CreateRegsFromRawData(
94 unwindstack::ArchEnum arch,
95 void* raw_data) {
96 std::unique_ptr<unwindstack::Regs> ret;
97 switch (arch) {
98 case unwindstack::ARCH_X86:
99 ret.reset(new unwindstack::RegsX86());
100 break;
101 case unwindstack::ARCH_X86_64:
102 ret.reset(new unwindstack::RegsX86_64());
103 break;
104 case unwindstack::ARCH_ARM:
105 ret.reset(new unwindstack::RegsArm());
106 break;
107 case unwindstack::ARCH_ARM64:
108 ret.reset(new unwindstack::RegsArm64());
109 break;
110 case unwindstack::ARCH_MIPS:
111 ret.reset(new unwindstack::RegsMips());
112 break;
113 case unwindstack::ARCH_MIPS64:
114 ret.reset(new unwindstack::RegsMips64());
115 break;
116 case unwindstack::ARCH_UNKNOWN:
117 break;
118 }
119 if (ret)
120 ReadFromRawData(ret.get(), raw_data);
121 return ret;
122 }
123
DoUnwind(WireMessage * msg,UnwindingMetadata * metadata,AllocRecord * out)124 bool DoUnwind(WireMessage* msg, UnwindingMetadata* metadata, AllocRecord* out) {
125 AllocMetadata* alloc_metadata = msg->alloc_header;
126 std::unique_ptr<unwindstack::Regs> regs(CreateRegsFromRawData(
127 alloc_metadata->arch, alloc_metadata->register_data));
128 if (regs == nullptr) {
129 PERFETTO_DLOG("Unable to construct unwindstack::Regs");
130 unwindstack::FrameData frame_data{};
131 frame_data.function_name = "ERROR READING REGISTERS";
132 frame_data.map_name = "ERROR";
133
134 out->frames.clear();
135 out->build_ids.clear();
136 out->frames.emplace_back(std::move(frame_data));
137 out->build_ids.emplace_back("");
138 out->error = true;
139 return false;
140 }
141 uint8_t* stack = reinterpret_cast<uint8_t*>(msg->payload);
142 std::shared_ptr<unwindstack::Memory> mems =
143 std::make_shared<StackOverlayMemory>(metadata->fd_mem,
144 alloc_metadata->stack_pointer, stack,
145 msg->payload_size);
146
147 unwindstack::Unwinder unwinder(kMaxFrames, &metadata->fd_maps, regs.get(),
148 mems);
149 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
150 unwinder.SetJitDebug(metadata->GetJitDebug(regs->Arch()));
151 unwinder.SetDexFiles(metadata->GetDexFiles(regs->Arch()));
152 #endif
153 // Suppress incorrect "variable may be uninitialized" error for if condition
154 // after this loop. error_code = LastErrorCode gets run at least once.
155 unwindstack::ErrorCode error_code = unwindstack::ERROR_NONE;
156 for (int attempt = 0; attempt < 2; ++attempt) {
157 if (attempt > 0) {
158 if (metadata->last_maps_reparse_time + kMapsReparseInterval >
159 base::GetWallTimeMs()) {
160 PERFETTO_DLOG("Skipping reparse due to rate limit.");
161 break;
162 }
163 PERFETTO_DLOG("Reparsing maps");
164 metadata->ReparseMaps();
165 metadata->last_maps_reparse_time = base::GetWallTimeMs();
166 // Regs got invalidated by libuwindstack's speculative jump.
167 // Reset.
168 ReadFromRawData(regs.get(), alloc_metadata->register_data);
169 out->reparsed_map = true;
170 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
171 unwinder.SetJitDebug(metadata->GetJitDebug(regs->Arch()));
172 unwinder.SetDexFiles(metadata->GetDexFiles(regs->Arch()));
173 #endif
174 }
175 out->frames.swap(unwinder.frames()); // Provide the unwinder buffer to use.
176 unwinder.Unwind(&kSkipMaps, /*map_suffixes_to_ignore=*/nullptr);
177 out->frames.swap(unwinder.frames()); // Take the buffer back.
178 error_code = unwinder.LastErrorCode();
179 if (error_code != unwindstack::ERROR_INVALID_MAP &&
180 (unwinder.warnings() & unwindstack::WARNING_DEX_PC_NOT_IN_MAP) == 0) {
181 break;
182 }
183 }
184 out->build_ids.resize(out->frames.size());
185 for (size_t i = 0; i < out->frames.size(); ++i) {
186 out->build_ids[i] = metadata->GetBuildId(out->frames[i]);
187 }
188
189 if (error_code != unwindstack::ERROR_NONE) {
190 PERFETTO_DLOG("Unwinding error %" PRIu8, error_code);
191 unwindstack::FrameData frame_data{};
192 frame_data.function_name =
193 "ERROR " + StringifyLibUnwindstackError(error_code);
194 frame_data.map_name = "ERROR";
195
196 out->frames.emplace_back(std::move(frame_data));
197 out->build_ids.emplace_back("");
198 out->error = true;
199 }
200 return true;
201 }
202
OnDisconnect(base::UnixSocket * self)203 void UnwindingWorker::OnDisconnect(base::UnixSocket* self) {
204 pid_t peer_pid = self->peer_pid_linux();
205 auto it = client_data_.find(peer_pid);
206 if (it == client_data_.end()) {
207 PERFETTO_DFATAL_OR_ELOG("Disconnected unexpected socket.");
208 return;
209 }
210
211 ClientData& client_data = it->second;
212 ReadAndUnwindBatch(&client_data);
213 SharedRingBuffer& shmem = client_data.shmem;
214
215 if (!client_data.free_records.empty()) {
216 delegate_->PostFreeRecord(this, std::move(client_data.free_records));
217 client_data.free_records.clear();
218 }
219
220 SharedRingBuffer::Stats stats = {};
221 {
222 auto lock = shmem.AcquireLock(ScopedSpinlock::Mode::Try);
223 if (lock.locked())
224 stats = shmem.GetStats(lock);
225 else
226 PERFETTO_ELOG("Failed to log shmem to get stats.");
227 }
228 DataSourceInstanceID ds_id = client_data.data_source_instance_id;
229
230 client_data_.erase(it);
231 if (client_data_.empty()) {
232 // We got rid of the last client. Flush and destruct AllocRecords in
233 // arena. Disable the arena (will not accept returning borrowed records)
234 // in case there are pending AllocRecords on the main thread.
235 alloc_record_arena_.Disable();
236 }
237 // The erase invalidates the self pointer.
238 self = nullptr;
239 delegate_->PostSocketDisconnected(this, ds_id, peer_pid, stats);
240 }
241
OnDataAvailable(base::UnixSocket * self)242 void UnwindingWorker::OnDataAvailable(base::UnixSocket* self) {
243 // Drain buffer to clear the notification.
244 char recv_buf[kUnwindBatchSize];
245 self->Receive(recv_buf, sizeof(recv_buf));
246 BatchUnwindJob(self->peer_pid_linux());
247 }
248
ReadAndUnwindBatch(ClientData * client_data)249 UnwindingWorker::ReadAndUnwindBatchResult UnwindingWorker::ReadAndUnwindBatch(
250 ClientData* client_data) {
251 SharedRingBuffer& shmem = client_data->shmem;
252 SharedRingBuffer::Buffer buf;
253
254 size_t i;
255 for (i = 0; i < kUnwindBatchSize; ++i) {
256 uint64_t reparses_before = client_data->metadata.reparses;
257 buf = shmem.BeginRead();
258 if (!buf)
259 break;
260 HandleBuffer(this, &alloc_record_arena_, buf, client_data,
261 client_data->sock->peer_pid_linux(), delegate_);
262 shmem.EndRead(std::move(buf));
263 // Reparsing takes time, so process the rest in a new batch to avoid timing
264 // out.
265 if (reparses_before < client_data->metadata.reparses) {
266 return ReadAndUnwindBatchResult::kHasMore;
267 }
268 }
269
270 if (i == kUnwindBatchSize) {
271 return ReadAndUnwindBatchResult::kHasMore;
272 } else if (i > 0) {
273 return ReadAndUnwindBatchResult::kReadSome;
274 } else {
275 return ReadAndUnwindBatchResult::kReadNone;
276 }
277 }
278
BatchUnwindJob(pid_t peer_pid)279 void UnwindingWorker::BatchUnwindJob(pid_t peer_pid) {
280 auto it = client_data_.find(peer_pid);
281 if (it == client_data_.end()) {
282 // This can happen if the client disconnected before the buffer was fully
283 // handled.
284 PERFETTO_DLOG("Unexpected data.");
285 return;
286 }
287
288 bool job_reposted = false;
289 bool reader_paused = false;
290 ClientData& client_data = it->second;
291 switch (ReadAndUnwindBatch(&client_data)) {
292 case ReadAndUnwindBatchResult::kHasMore:
293 thread_task_runner_.get()->PostTask(
294 [this, peer_pid] { BatchUnwindJob(peer_pid); });
295 job_reposted = true;
296 break;
297 case ReadAndUnwindBatchResult::kReadSome:
298 thread_task_runner_.get()->PostDelayedTask(
299 [this, peer_pid] { BatchUnwindJob(peer_pid); }, kRetryDelayMs);
300 job_reposted = true;
301 break;
302 case ReadAndUnwindBatchResult::kReadNone:
303 client_data.shmem.SetReaderPaused();
304 reader_paused = true;
305 break;
306 }
307
308 // We need to either repost the job, or set the reader paused bit. By
309 // setting that bit, we inform the client that we want to be notified when
310 // new data is written to the shared memory buffer.
311 // If we do neither of these things, we will not read from the shared memory
312 // buffer again.
313 PERFETTO_CHECK(job_reposted || reader_paused);
314 }
315
316 // static
HandleBuffer(UnwindingWorker * self,AllocRecordArena * alloc_record_arena,const SharedRingBuffer::Buffer & buf,ClientData * client_data,pid_t peer_pid,Delegate * delegate)317 void UnwindingWorker::HandleBuffer(UnwindingWorker* self,
318 AllocRecordArena* alloc_record_arena,
319 const SharedRingBuffer::Buffer& buf,
320 ClientData* client_data,
321 pid_t peer_pid,
322 Delegate* delegate) {
323 UnwindingMetadata* unwinding_metadata = &client_data->metadata;
324 DataSourceInstanceID data_source_instance_id =
325 client_data->data_source_instance_id;
326 WireMessage msg;
327 // TODO(fmayer): standardise on char* or uint8_t*.
328 // char* has stronger guarantees regarding aliasing.
329 // see https://timsong-cpp.github.io/cppwp/n3337/basic.lval#10.8
330 if (!ReceiveWireMessage(reinterpret_cast<char*>(buf.data), buf.size, &msg)) {
331 PERFETTO_DFATAL_OR_ELOG("Failed to receive wire message.");
332 return;
333 }
334
335 if (msg.record_type == RecordType::Malloc) {
336 std::unique_ptr<AllocRecord> rec = alloc_record_arena->BorrowAllocRecord();
337 rec->alloc_metadata = *msg.alloc_header;
338 rec->pid = peer_pid;
339 rec->data_source_instance_id = data_source_instance_id;
340 auto start_time_us = base::GetWallTimeNs() / 1000;
341 if (!client_data->stream_allocations)
342 DoUnwind(&msg, unwinding_metadata, rec.get());
343 rec->unwinding_time_us = static_cast<uint64_t>(
344 ((base::GetWallTimeNs() / 1000) - start_time_us).count());
345 delegate->PostAllocRecord(self, std::move(rec));
346 } else if (msg.record_type == RecordType::Free) {
347 FreeRecord rec;
348 rec.pid = peer_pid;
349 rec.data_source_instance_id = data_source_instance_id;
350 // We need to copy this, so we can return the memory to the shmem buffer.
351 memcpy(&rec.entry, msg.free_header, sizeof(*msg.free_header));
352 client_data->free_records.emplace_back(std::move(rec));
353 if (client_data->free_records.size() == kRecordBatchSize) {
354 delegate->PostFreeRecord(self, std::move(client_data->free_records));
355 client_data->free_records.clear();
356 client_data->free_records.reserve(kRecordBatchSize);
357 }
358 } else if (msg.record_type == RecordType::HeapName) {
359 HeapNameRecord rec;
360 rec.pid = peer_pid;
361 rec.data_source_instance_id = data_source_instance_id;
362 memcpy(&rec.entry, msg.heap_name_header, sizeof(*msg.heap_name_header));
363 rec.entry.heap_name[sizeof(rec.entry.heap_name) - 1] = '\0';
364 delegate->PostHeapNameRecord(self, std::move(rec));
365 } else {
366 PERFETTO_DFATAL_OR_ELOG("Invalid record type.");
367 }
368 }
369
PostHandoffSocket(HandoffData handoff_data)370 void UnwindingWorker::PostHandoffSocket(HandoffData handoff_data) {
371 // Even with C++14, this cannot be moved, as std::function has to be
372 // copyable, which HandoffData is not.
373 HandoffData* raw_data = new HandoffData(std::move(handoff_data));
374 // We do not need to use a WeakPtr here because the task runner will not
375 // outlive its UnwindingWorker.
376 thread_task_runner_.get()->PostTask([this, raw_data] {
377 HandoffData data = std::move(*raw_data);
378 delete raw_data;
379 HandleHandoffSocket(std::move(data));
380 });
381 }
382
HandleHandoffSocket(HandoffData handoff_data)383 void UnwindingWorker::HandleHandoffSocket(HandoffData handoff_data) {
384 auto sock = base::UnixSocket::AdoptConnected(
385 handoff_data.sock.ReleaseFd(), this, this->thread_task_runner_.get(),
386 base::SockFamily::kUnix, base::SockType::kStream);
387 pid_t peer_pid = sock->peer_pid_linux();
388
389 UnwindingMetadata metadata(std::move(handoff_data.maps_fd),
390 std::move(handoff_data.mem_fd));
391 ClientData client_data{
392 handoff_data.data_source_instance_id,
393 std::move(sock),
394 std::move(metadata),
395 std::move(handoff_data.shmem),
396 std::move(handoff_data.client_config),
397 handoff_data.stream_allocations,
398 {},
399 };
400 client_data.free_records.reserve(kRecordBatchSize);
401 client_data.shmem.SetReaderPaused();
402 client_data_.emplace(peer_pid, std::move(client_data));
403 alloc_record_arena_.Enable();
404 }
405
PostDisconnectSocket(pid_t pid)406 void UnwindingWorker::PostDisconnectSocket(pid_t pid) {
407 // We do not need to use a WeakPtr here because the task runner will not
408 // outlive its UnwindingWorker.
409 thread_task_runner_.get()->PostTask(
410 [this, pid] { HandleDisconnectSocket(pid); });
411 }
412
HandleDisconnectSocket(pid_t pid)413 void UnwindingWorker::HandleDisconnectSocket(pid_t pid) {
414 auto it = client_data_.find(pid);
415 if (it == client_data_.end()) {
416 // This is expected if the client voluntarily disconnects before the
417 // profiling session ended. In that case, there is a race between the main
418 // thread learning about the disconnect and it calling back here.
419 return;
420 }
421 ClientData& client_data = it->second;
422 // Shutdown and call OnDisconnect handler.
423 client_data.shmem.SetShuttingDown();
424 client_data.sock->Shutdown(/* notify= */ true);
425 }
426
BorrowAllocRecord()427 std::unique_ptr<AllocRecord> AllocRecordArena::BorrowAllocRecord() {
428 std::lock_guard<std::mutex> l(*alloc_records_mutex_);
429 if (!alloc_records_.empty()) {
430 std::unique_ptr<AllocRecord> result = std::move(alloc_records_.back());
431 alloc_records_.pop_back();
432 return result;
433 }
434 return std::unique_ptr<AllocRecord>(new AllocRecord());
435 }
436
ReturnAllocRecord(std::unique_ptr<AllocRecord> record)437 void AllocRecordArena::ReturnAllocRecord(std::unique_ptr<AllocRecord> record) {
438 std::lock_guard<std::mutex> l(*alloc_records_mutex_);
439 if (enabled_ && record && alloc_records_.size() < kMaxAllocRecordArenaSize)
440 alloc_records_.emplace_back(std::move(record));
441 }
442
Disable()443 void AllocRecordArena::Disable() {
444 std::lock_guard<std::mutex> l(*alloc_records_mutex_);
445 alloc_records_.clear();
446 enabled_ = false;
447 }
448
Enable()449 void AllocRecordArena::Enable() {
450 std::lock_guard<std::mutex> l(*alloc_records_mutex_);
451 enabled_ = true;
452 }
453
454 UnwindingWorker::Delegate::~Delegate() = default;
455
456 } // namespace profiling
457 } // namespace perfetto
458