1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "perfetto_hprof"
18
19 #include "perfetto_hprof.h"
20
21 #include <android-base/logging.h>
22 #include <base/fast_exit.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/un.h>
31 #include <sys/wait.h>
32 #include <thread>
33 #include <time.h>
34
35 #include <limits>
36 #include <optional>
37 #include <type_traits>
38
39 #include "gc/heap-visit-objects-inl.h"
40 #include "gc/heap.h"
41 #include "gc/scoped_gc_critical_section.h"
42 #include "mirror/object-refvisitor-inl.h"
43 #include "nativehelper/scoped_local_ref.h"
44 #include "perfetto/profiling/parse_smaps.h"
45 #include "perfetto/trace/interned_data/interned_data.pbzero.h"
46 #include "perfetto/trace/profiling/heap_graph.pbzero.h"
47 #include "perfetto/trace/profiling/profile_common.pbzero.h"
48 #include "perfetto/trace/profiling/smaps.pbzero.h"
49 #include "perfetto/config/profiling/java_hprof_config.pbzero.h"
50 #include "perfetto/protozero/packed_repeated_fields.h"
51 #include "perfetto/tracing.h"
52 #include "runtime-inl.h"
53 #include "runtime_callbacks.h"
54 #include "scoped_thread_state_change-inl.h"
55 #include "thread_list.h"
56 #include "well_known_classes.h"
57 #include "dex/descriptors_names.h"
58
59 // There are three threads involved in this:
60 // * listener thread: this is idle in the background when this plugin gets loaded, and waits
61 // for data on on g_signal_pipe_fds.
62 // * signal thread: an arbitrary thread that handles the signal and writes data to
63 // g_signal_pipe_fds.
64 // * perfetto producer thread: once the signal is received, the app forks. In the newly forked
65 // child, the Perfetto Client API spawns a thread to communicate with traced.
66
67 namespace perfetto_hprof {
68
69 constexpr int kJavaHeapprofdSignal = __SIGRTMIN + 6;
70 constexpr time_t kWatchdogTimeoutSec = 120;
71 // This needs to be lower than the maximum acceptable chunk size, because this
72 // is checked *before* writing another submessage. We conservatively assume
73 // submessages can be up to 100k here for a 500k chunk size.
74 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
75 constexpr uint32_t kPacketSizeThreshold = 400000;
76 constexpr char kByte[1] = {'x'};
GetStateMutex()77 static art::Mutex& GetStateMutex() {
78 static art::Mutex state_mutex("perfetto_hprof_state_mutex", art::LockLevel::kGenericBottomLock);
79 return state_mutex;
80 }
81
GetStateCV()82 static art::ConditionVariable& GetStateCV() {
83 static art::ConditionVariable state_cv("perfetto_hprof_state_cv", GetStateMutex());
84 return state_cv;
85 }
86
87 static int requested_tracing_session_id = 0;
88 static State g_state = State::kUninitialized;
89
90 // Pipe to signal from the signal handler into a worker thread that handles the
91 // dump requests.
92 int g_signal_pipe_fds[2];
93 static struct sigaction g_orig_act = {};
94
95 template <typename T>
FindOrAppend(std::map<T,uint64_t> * m,const T & s)96 uint64_t FindOrAppend(std::map<T, uint64_t>* m, const T& s) {
97 auto it = m->find(s);
98 if (it == m->end()) {
99 std::tie(it, std::ignore) = m->emplace(s, m->size());
100 }
101 return it->second;
102 }
103
ArmWatchdogOrDie()104 void ArmWatchdogOrDie() {
105 timer_t timerid{};
106 struct sigevent sev {};
107 sev.sigev_notify = SIGEV_SIGNAL;
108 sev.sigev_signo = SIGKILL;
109
110 if (timer_create(CLOCK_MONOTONIC, &sev, &timerid) == -1) {
111 // This only gets called in the child, so we can fatal without impacting
112 // the app.
113 PLOG(FATAL) << "failed to create watchdog timer";
114 }
115
116 struct itimerspec its {};
117 its.it_value.tv_sec = kWatchdogTimeoutSec;
118
119 if (timer_settime(timerid, 0, &its, nullptr) == -1) {
120 // This only gets called in the child, so we can fatal without impacting
121 // the app.
122 PLOG(FATAL) << "failed to arm watchdog timer";
123 }
124 }
125
StartsWith(const std::string & str,const std::string & prefix)126 bool StartsWith(const std::string& str, const std::string& prefix) {
127 return str.compare(0, prefix.length(), prefix) == 0;
128 }
129
130 // Sample entries that match one of the following
131 // start with /system/
132 // start with /vendor/
133 // start with /data/app/
134 // contains "extracted in memory from Y", where Y matches any of the above
ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry & e)135 bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) {
136 if (StartsWith(e.pathname, "/system/") || StartsWith(e.pathname, "/vendor/") ||
137 StartsWith(e.pathname, "/data/app/")) {
138 return true;
139 }
140 if (StartsWith(e.pathname, "[anon:")) {
141 if (e.pathname.find("extracted in memory from /system/") != std::string::npos) {
142 return true;
143 }
144 if (e.pathname.find("extracted in memory from /vendor/") != std::string::npos) {
145 return true;
146 }
147 if (e.pathname.find("extracted in memory from /data/app/") != std::string::npos) {
148 return true;
149 }
150 }
151 return false;
152 }
153
154 class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
155 public:
156 constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
157 perfetto::BufferExhaustedPolicy::kStall;
OnSetup(const SetupArgs & args)158 void OnSetup(const SetupArgs& args) override {
159 uint64_t normalized_cfg_tracing_session_id =
160 args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
161 if (requested_tracing_session_id < 0) {
162 LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
163 return;
164 }
165 if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_cfg_tracing_session_id) {
166 return;
167 }
168
169 // This is on the heap as it triggers -Wframe-larger-than.
170 std::unique_ptr<perfetto::protos::pbzero::JavaHprofConfig::Decoder> cfg(
171 new perfetto::protos::pbzero::JavaHprofConfig::Decoder(
172 args.config->java_hprof_config_raw()));
173
174 dump_smaps_ = cfg->dump_smaps();
175 for (auto it = cfg->ignored_types(); it; ++it) {
176 std::string name = (*it).ToStdString();
177 ignored_types_.emplace_back(std::move(name));
178 }
179 // This tracing session ID matches the requesting tracing session ID, so we know heapprofd
180 // has verified it targets this process.
181 enabled_ = true;
182 }
183
dump_smaps()184 bool dump_smaps() { return dump_smaps_; }
enabled()185 bool enabled() { return enabled_; }
186
OnStart(const StartArgs &)187 void OnStart(const StartArgs&) override {
188 if (!enabled()) {
189 return;
190 }
191 art::MutexLock lk(art_thread(), GetStateMutex());
192 if (g_state == State::kWaitForStart) {
193 g_state = State::kStart;
194 GetStateCV().Broadcast(art_thread());
195 }
196 }
197
198 // This datasource can be used with a trace config with a short duration_ms
199 // but a long datasource_stop_timeout_ms. In that case, OnStop is called (in
200 // general) before the dump is done. In that case, we handle the stop
201 // asynchronously, and notify the tracing service once we are done.
202 // In case OnStop is called after the dump is done (but before the process)
203 // has exited, we just acknowledge the request.
OnStop(const StopArgs & a)204 void OnStop(const StopArgs& a) override {
205 art::MutexLock lk(art_thread(), finish_mutex_);
206 if (is_finished_) {
207 return;
208 }
209 is_stopped_ = true;
210 async_stop_ = std::move(a.HandleStopAsynchronously());
211 }
212
art_thread()213 static art::Thread* art_thread() {
214 // TODO(fmayer): Attach the Perfetto producer thread to ART and give it a name. This is
215 // not trivial, we cannot just attach the first time this method is called, because
216 // AttachCurrentThread deadlocks with the ConditionVariable::Wait in WaitForDataSource.
217 //
218 // We should attach the thread as soon as the Client API spawns it, but that needs more
219 // complicated plumbing.
220 return nullptr;
221 }
222
ignored_types()223 std::vector<std::string> ignored_types() { return ignored_types_; }
224
Finish()225 void Finish() {
226 art::MutexLock lk(art_thread(), finish_mutex_);
227 if (is_stopped_) {
228 async_stop_();
229 } else {
230 is_finished_ = true;
231 }
232 }
233
234 private:
235 bool enabled_ = false;
236 bool dump_smaps_ = false;
237 std::vector<std::string> ignored_types_;
238 static art::Thread* self_;
239
240 art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock};
241 bool is_finished_ = false;
242 bool is_stopped_ = false;
243 std::function<void()> async_stop_;
244 };
245
246 art::Thread* JavaHprofDataSource::self_ = nullptr;
247
248
WaitForDataSource(art::Thread * self)249 void WaitForDataSource(art::Thread* self) {
250 perfetto::TracingInitArgs args;
251 args.backends = perfetto::BackendType::kSystemBackend;
252 perfetto::Tracing::Initialize(args);
253
254 perfetto::DataSourceDescriptor dsd;
255 dsd.set_name("android.java_hprof");
256 dsd.set_will_notify_on_stop(true);
257 JavaHprofDataSource::Register(dsd);
258
259 LOG(INFO) << "waiting for data source";
260
261 art::MutexLock lk(self, GetStateMutex());
262 while (g_state != State::kStart) {
263 GetStateCV().Wait(self);
264 }
265 }
266
267 class Writer {
268 public:
Writer(pid_t parent_pid,JavaHprofDataSource::TraceContext * ctx,uint64_t timestamp)269 Writer(pid_t parent_pid, JavaHprofDataSource::TraceContext* ctx, uint64_t timestamp)
270 : parent_pid_(parent_pid), ctx_(ctx), timestamp_(timestamp),
271 last_written_(ctx_->written()) {}
272
273 // Return whether the next call to GetHeapGraph will create a new TracePacket.
will_create_new_packet()274 bool will_create_new_packet() {
275 return !heap_graph_ || ctx_->written() - last_written_ > kPacketSizeThreshold;
276 }
277
GetHeapGraph()278 perfetto::protos::pbzero::HeapGraph* GetHeapGraph() {
279 if (will_create_new_packet()) {
280 CreateNewHeapGraph();
281 }
282 return heap_graph_;
283 }
284
CreateNewHeapGraph()285 void CreateNewHeapGraph() {
286 if (heap_graph_) {
287 heap_graph_->set_continued(true);
288 }
289 Finalize();
290
291 uint64_t written = ctx_->written();
292
293 trace_packet_ = ctx_->NewTracePacket();
294 trace_packet_->set_timestamp(timestamp_);
295 heap_graph_ = trace_packet_->set_heap_graph();
296 heap_graph_->set_pid(parent_pid_);
297 heap_graph_->set_index(index_++);
298
299 last_written_ = written;
300 }
301
Finalize()302 void Finalize() {
303 if (trace_packet_) {
304 trace_packet_->Finalize();
305 }
306 heap_graph_ = nullptr;
307 }
308
~Writer()309 ~Writer() { Finalize(); }
310
311 private:
312 const pid_t parent_pid_;
313 JavaHprofDataSource::TraceContext* const ctx_;
314 const uint64_t timestamp_;
315
316 uint64_t last_written_ = 0;
317
318 perfetto::DataSource<JavaHprofDataSource>::TraceContext::TracePacketHandle
319 trace_packet_;
320 perfetto::protos::pbzero::HeapGraph* heap_graph_ = nullptr;
321
322 uint64_t index_ = 0;
323 };
324
325 class ReferredObjectsFinder {
326 public:
ReferredObjectsFinder(std::vector<std::pair<std::string,art::mirror::Object * >> * referred_objects,art::mirror::Object ** min_nonnull_ptr)327 explicit ReferredObjectsFinder(
328 std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects,
329 art::mirror::Object** min_nonnull_ptr)
330 : referred_objects_(referred_objects), min_nonnull_ptr_(min_nonnull_ptr) {}
331
332 // For art::mirror::Object::VisitReferences.
operator ()(art::ObjPtr<art::mirror::Object> obj,art::MemberOffset offset,bool is_static) const333 void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
334 bool is_static) const
335 REQUIRES_SHARED(art::Locks::mutator_lock_) {
336 if (offset.Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
337 // Skip shadow$klass pointer.
338 return;
339 }
340 art::mirror::Object* ref = obj->GetFieldObject<art::mirror::Object>(offset);
341 art::ArtField* field;
342 if (is_static) {
343 field = art::ArtField::FindStaticFieldWithOffset(obj->AsClass(), offset.Uint32Value());
344 } else {
345 field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
346 }
347 std::string field_name = "";
348 if (field != nullptr) {
349 field_name = field->PrettyField(/*with_type=*/true);
350 }
351 referred_objects_->emplace_back(std::move(field_name), ref);
352 if (!*min_nonnull_ptr_ || (ref && *min_nonnull_ptr_ > ref)) {
353 *min_nonnull_ptr_ = ref;
354 }
355 }
356
VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object> * root ATTRIBUTE_UNUSED) const357 void VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object>* root
358 ATTRIBUTE_UNUSED) const {}
VisitRoot(art::mirror::CompressedReference<art::mirror::Object> * root ATTRIBUTE_UNUSED) const359 void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root
360 ATTRIBUTE_UNUSED) const {}
361
362 private:
363 // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
364 // fork.
365 std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
366 art::mirror::Object** min_nonnull_ptr_;
367 };
368
369 class RootFinder : public art::SingleRootVisitor {
370 public:
RootFinder(std::map<art::RootType,std::vector<art::mirror::Object * >> * root_objects)371 explicit RootFinder(
372 std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects)
373 : root_objects_(root_objects) {}
374
VisitRoot(art::mirror::Object * root,const art::RootInfo & info)375 void VisitRoot(art::mirror::Object* root, const art::RootInfo& info) override {
376 (*root_objects_)[info.GetType()].emplace_back(root);
377 }
378
379 private:
380 // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
381 // fork.
382 std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects_;
383 };
384
ToProtoType(art::RootType art_type)385 perfetto::protos::pbzero::HeapGraphRoot::Type ToProtoType(art::RootType art_type) {
386 using perfetto::protos::pbzero::HeapGraphRoot;
387 switch (art_type) {
388 case art::kRootUnknown:
389 return HeapGraphRoot::ROOT_UNKNOWN;
390 case art::kRootJNIGlobal:
391 return HeapGraphRoot::ROOT_JNI_GLOBAL;
392 case art::kRootJNILocal:
393 return HeapGraphRoot::ROOT_JNI_LOCAL;
394 case art::kRootJavaFrame:
395 return HeapGraphRoot::ROOT_JAVA_FRAME;
396 case art::kRootNativeStack:
397 return HeapGraphRoot::ROOT_NATIVE_STACK;
398 case art::kRootStickyClass:
399 return HeapGraphRoot::ROOT_STICKY_CLASS;
400 case art::kRootThreadBlock:
401 return HeapGraphRoot::ROOT_THREAD_BLOCK;
402 case art::kRootMonitorUsed:
403 return HeapGraphRoot::ROOT_MONITOR_USED;
404 case art::kRootThreadObject:
405 return HeapGraphRoot::ROOT_THREAD_OBJECT;
406 case art::kRootInternedString:
407 return HeapGraphRoot::ROOT_INTERNED_STRING;
408 case art::kRootFinalizing:
409 return HeapGraphRoot::ROOT_FINALIZING;
410 case art::kRootDebugger:
411 return HeapGraphRoot::ROOT_DEBUGGER;
412 case art::kRootReferenceCleanup:
413 return HeapGraphRoot::ROOT_REFERENCE_CLEANUP;
414 case art::kRootVMInternal:
415 return HeapGraphRoot::ROOT_VM_INTERNAL;
416 case art::kRootJNIMonitor:
417 return HeapGraphRoot::ROOT_JNI_MONITOR;
418 }
419 }
420
ProtoClassKind(uint32_t class_flags)421 perfetto::protos::pbzero::HeapGraphType::Kind ProtoClassKind(uint32_t class_flags) {
422 using perfetto::protos::pbzero::HeapGraphType;
423 switch (class_flags) {
424 case art::mirror::kClassFlagNormal:
425 return HeapGraphType::KIND_NORMAL;
426 case art::mirror::kClassFlagNoReferenceFields:
427 return HeapGraphType::KIND_NOREFERENCES;
428 case art::mirror::kClassFlagString | art::mirror::kClassFlagNoReferenceFields:
429 return HeapGraphType::KIND_STRING;
430 case art::mirror::kClassFlagObjectArray:
431 return HeapGraphType::KIND_ARRAY;
432 case art::mirror::kClassFlagClass:
433 return HeapGraphType::KIND_CLASS;
434 case art::mirror::kClassFlagClassLoader:
435 return HeapGraphType::KIND_CLASSLOADER;
436 case art::mirror::kClassFlagDexCache:
437 return HeapGraphType::KIND_DEXCACHE;
438 case art::mirror::kClassFlagSoftReference:
439 return HeapGraphType::KIND_SOFT_REFERENCE;
440 case art::mirror::kClassFlagWeakReference:
441 return HeapGraphType::KIND_WEAK_REFERENCE;
442 case art::mirror::kClassFlagFinalizerReference:
443 return HeapGraphType::KIND_FINALIZER_REFERENCE;
444 case art::mirror::kClassFlagPhantomReference:
445 return HeapGraphType::KIND_PHANTOM_REFERENCE;
446 default:
447 return HeapGraphType::KIND_UNKNOWN;
448 }
449 }
450
PrettyType(art::mirror::Class * klass)451 std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS {
452 if (klass == nullptr) {
453 return "(raw)";
454 }
455 std::string temp;
456 std::string result(art::PrettyDescriptor(klass->GetDescriptor(&temp)));
457 return result;
458 }
459
DumpSmaps(JavaHprofDataSource::TraceContext * ctx)460 void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) {
461 FILE* smaps = fopen("/proc/self/smaps", "r");
462 if (smaps != nullptr) {
463 auto trace_packet = ctx->NewTracePacket();
464 auto* smaps_packet = trace_packet->set_smaps_packet();
465 smaps_packet->set_pid(getpid());
466 perfetto::profiling::ParseSmaps(smaps,
467 [&smaps_packet](const perfetto::profiling::SmapsEntry& e) {
468 if (ShouldSampleSmapsEntry(e)) {
469 auto* smaps_entry = smaps_packet->add_entries();
470 smaps_entry->set_path(e.pathname);
471 smaps_entry->set_size_kb(e.size_kb);
472 smaps_entry->set_private_dirty_kb(e.private_dirty_kb);
473 smaps_entry->set_swap_kb(e.swap_kb);
474 }
475 });
476 fclose(smaps);
477 } else {
478 PLOG(ERROR) << "failed to open smaps";
479 }
480 }
481
GetObjectId(const art::mirror::Object * obj)482 uint64_t GetObjectId(const art::mirror::Object* obj) {
483 return reinterpret_cast<uint64_t>(obj) / std::alignment_of<art::mirror::Object>::value;
484 }
485
486 template <typename F>
ForInstanceReferenceField(art::mirror::Class * klass,F fn)487 void ForInstanceReferenceField(art::mirror::Class* klass, F fn) NO_THREAD_SAFETY_ANALYSIS {
488 for (art::ArtField& af : klass->GetIFields()) {
489 if (af.IsPrimitiveType() ||
490 af.GetOffset().Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
491 continue;
492 }
493 fn(af.GetOffset());
494 }
495 }
496
IsIgnored(const std::vector<std::string> & ignored_types,art::mirror::Object * obj)497 bool IsIgnored(const std::vector<std::string>& ignored_types,
498 art::mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS {
499 if (obj->IsClass()) {
500 return false;
501 }
502 art::mirror::Class* klass = obj->GetClass();
503 return std::find(ignored_types.begin(), ignored_types.end(), PrettyType(klass)) !=
504 ignored_types.end();
505 }
506
EncodedSize(uint64_t n)507 size_t EncodedSize(uint64_t n) {
508 if (n == 0) return 1;
509 return 1 + static_cast<size_t>(art::MostSignificantBit(n)) / 7;
510 }
511
DumpPerfetto(art::Thread * self)512 void DumpPerfetto(art::Thread* self) {
513 pid_t parent_pid = getpid();
514 LOG(INFO) << "preparing to dump heap for " << parent_pid;
515
516 // Need to take a heap dump while GC isn't running. See the comment in
517 // Heap::VisitObjects(). Also we need the critical section to avoid visiting
518 // the same object twice. See b/34967844.
519 //
520 // We need to do this before the fork, because otherwise it can deadlock
521 // waiting for the GC, as all other threads get terminated by the clone, but
522 // their locks are not released.
523 // This does not perfectly solve all fork-related issues, as there could still be threads that
524 // are unaffected by ScopedSuspendAll and in a non-fork-friendly situation
525 // (e.g. inside a malloc holding a lock). This situation is quite rare, and in that case we will
526 // hit the watchdog in the grand-child process if it gets stuck.
527 std::optional<art::gc::ScopedGCCriticalSection> gcs(std::in_place, self, art::gc::kGcCauseHprof,
528 art::gc::kCollectorTypeHprof);
529
530 std::optional<art::ScopedSuspendAll> ssa(std::in_place, __FUNCTION__, /* long_suspend=*/ true);
531
532 pid_t pid = fork();
533 if (pid == -1) {
534 // Fork error.
535 PLOG(ERROR) << "fork";
536 return;
537 }
538 if (pid != 0) {
539 // Parent
540 // Stop the thread suspension as soon as possible to allow the rest of the application to
541 // continue while we waitpid here.
542 ssa.reset();
543 gcs.reset();
544 for (size_t i = 0;; ++i) {
545 if (i == 1000) {
546 // The child hasn't exited for 1 second (and all it was supposed to do was fork itself).
547 // Give up and SIGKILL it. The next waitpid should succeed.
548 LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
549 kill(pid, SIGKILL);
550 }
551 // Busy waiting here will introduce some extra latency, but that is okay because we have
552 // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
553 // is not needed for progress of the app itself.
554 int stat_loc;
555 pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
556 if (wait_result == -1 && errno != EINTR) {
557 if (errno != ECHILD) {
558 // This hopefully never happens (should only be EINVAL).
559 PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
560 }
561 // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
562 // The child is no longer here either way, so that's good enough for us.
563 break;
564 } else if (wait_result > 0) {
565 break;
566 } else { // wait_result == 0 || errno == EINTR.
567 usleep(1000);
568 }
569 }
570 return;
571 }
572
573 // The following code is only executed by the child of the original process.
574
575 // Uninstall signal handler, so we don't trigger a profile on it.
576 if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
577 close(g_signal_pipe_fds[0]);
578 close(g_signal_pipe_fds[1]);
579 PLOG(FATAL) << "Failed to sigaction";
580 return;
581 }
582
583 // Daemon creates a new process that is the grand-child of the original process, and exits.
584 if (daemon(0, 0) == -1) {
585 PLOG(FATAL) << "daemon";
586 }
587
588 // The following code is only executed by the grand-child of the original process.
589
590 // Make sure that this is the first thing we do after forking, so if anything
591 // below hangs, the fork will go away from the watchdog.
592 ArmWatchdogOrDie();
593
594 struct timespec ts = {};
595 if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
596 LOG(FATAL) << "Failed to get boottime.";
597 }
598 uint64_t timestamp = ts.tv_sec * 1000000000LL + ts.tv_nsec;
599
600 WaitForDataSource(self);
601
602 JavaHprofDataSource::Trace(
603 [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx)
604 NO_THREAD_SAFETY_ANALYSIS {
605 bool dump_smaps;
606 std::vector<std::string> ignored_types;
607 {
608 auto ds = ctx.GetDataSourceLocked();
609 if (!ds || !ds->enabled()) {
610 if (ds) ds->Finish();
611 LOG(INFO) << "skipping irrelevant data source.";
612 return;
613 }
614 dump_smaps = ds->dump_smaps();
615 ignored_types = ds->ignored_types();
616 }
617 LOG(INFO) << "dumping heap for " << parent_pid;
618 if (dump_smaps) {
619 DumpSmaps(&ctx);
620 }
621 Writer writer(parent_pid, &ctx, timestamp);
622 // Make sure that intern ID 0 (default proto value for a uint64_t) always maps to ""
623 // (default proto value for a string).
624 std::map<std::string, uint64_t> interned_fields{{"", 0}};
625 std::map<std::string, uint64_t> interned_locations{{"", 0}};
626 std::map<uintptr_t, uint64_t> interned_classes{{0, 0}};
627
628 std::map<art::RootType, std::vector<art::mirror::Object*>> root_objects;
629 RootFinder rcf(&root_objects);
630 art::Runtime::Current()->VisitRoots(&rcf);
631 std::unique_ptr<protozero::PackedVarInt> object_ids(
632 new protozero::PackedVarInt);
633 for (const auto& p : root_objects) {
634 const art::RootType root_type = p.first;
635 const std::vector<art::mirror::Object*>& children = p.second;
636 perfetto::protos::pbzero::HeapGraphRoot* root_proto =
637 writer.GetHeapGraph()->add_roots();
638 root_proto->set_root_type(ToProtoType(root_type));
639 for (art::mirror::Object* obj : children) {
640 if (writer.will_create_new_packet()) {
641 root_proto->set_object_ids(*object_ids);
642 object_ids->Reset();
643 root_proto = writer.GetHeapGraph()->add_roots();
644 root_proto->set_root_type(ToProtoType(root_type));
645 }
646 object_ids->Append(GetObjectId(obj));
647 }
648 root_proto->set_object_ids(*object_ids);
649 object_ids->Reset();
650 }
651
652 std::unique_ptr<protozero::PackedVarInt> reference_field_ids(
653 new protozero::PackedVarInt);
654 std::unique_ptr<protozero::PackedVarInt> reference_object_ids(
655 new protozero::PackedVarInt);
656
657 uint64_t prev_object_id = 0;
658
659 art::Runtime::Current()->GetHeap()->VisitObjectsPaused(
660 [&writer, &interned_fields, &interned_locations, &reference_field_ids,
661 &reference_object_ids, &interned_classes, &ignored_types, &prev_object_id](
662 art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
663 if (obj->IsClass()) {
664 art::mirror::Class* klass = obj->AsClass().Ptr();
665 perfetto::protos::pbzero::HeapGraphType* type_proto =
666 writer.GetHeapGraph()->add_types();
667 type_proto->set_id(FindOrAppend(&interned_classes,
668 reinterpret_cast<uintptr_t>(klass)));
669 type_proto->set_class_name(PrettyType(klass));
670 type_proto->set_location_id(FindOrAppend(&interned_locations,
671 klass->GetLocation()));
672 type_proto->set_object_size(klass->GetObjectSize());
673 type_proto->set_kind(ProtoClassKind(klass->GetClassFlags()));
674 type_proto->set_classloader_id(GetObjectId(klass->GetClassLoader().Ptr()));
675 if (klass->GetSuperClass().Ptr()) {
676 type_proto->set_superclass_id(
677 FindOrAppend(&interned_classes,
678 reinterpret_cast<uintptr_t>(klass->GetSuperClass().Ptr())));
679 }
680 ForInstanceReferenceField(
681 klass, [klass, &reference_field_ids, &interned_fields](
682 art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
683 auto art_field = art::ArtField::FindInstanceFieldWithOffset(
684 klass, offset.Uint32Value());
685 reference_field_ids->Append(
686 FindOrAppend(&interned_fields, art_field->PrettyField(true)));
687 });
688 type_proto->set_reference_field_id(*reference_field_ids);
689 reference_field_ids->Reset();
690 }
691
692 art::mirror::Class* klass = obj->GetClass();
693 uintptr_t class_ptr = reinterpret_cast<uintptr_t>(klass);
694 // We need to synethesize a new type for Class<Foo>, which does not exist
695 // in the runtime. Otherwise, all the static members of all classes would be
696 // attributed to java.lang.Class.
697 if (klass->IsClassClass()) {
698 CHECK(obj->IsClass());
699 perfetto::protos::pbzero::HeapGraphType* type_proto =
700 writer.GetHeapGraph()->add_types();
701 // All pointers are at least multiples of two, so this way we can make sure
702 // we are not colliding with a real class.
703 class_ptr = reinterpret_cast<uintptr_t>(obj) | 1;
704 auto class_id = FindOrAppend(&interned_classes, class_ptr);
705 type_proto->set_id(class_id);
706 type_proto->set_class_name(obj->PrettyTypeOf());
707 type_proto->set_location_id(FindOrAppend(&interned_locations,
708 obj->AsClass()->GetLocation()));
709 }
710
711 if (IsIgnored(ignored_types, obj)) {
712 return;
713 }
714
715 auto class_id = FindOrAppend(&interned_classes, class_ptr);
716
717 uint64_t object_id = GetObjectId(obj);
718 perfetto::protos::pbzero::HeapGraphObject* object_proto =
719 writer.GetHeapGraph()->add_objects();
720 if (prev_object_id && prev_object_id < object_id) {
721 object_proto->set_id_delta(object_id - prev_object_id);
722 } else {
723 object_proto->set_id(object_id);
724 }
725 prev_object_id = object_id;
726 object_proto->set_type_id(class_id);
727
728 // Arrays / strings are magic and have an instance dependent size.
729 if (obj->SizeOf() != klass->GetObjectSize())
730 object_proto->set_self_size(obj->SizeOf());
731
732 std::vector<std::pair<std::string, art::mirror::Object*>>
733 referred_objects;
734 art::mirror::Object* min_nonnull_ptr = nullptr;
735 ReferredObjectsFinder objf(&referred_objects, &min_nonnull_ptr);
736
737 const bool emit_field_ids =
738 klass->GetClassFlags() != art::mirror::kClassFlagObjectArray &&
739 klass->GetClassFlags() != art::mirror::kClassFlagNormal;
740 if (klass->GetClassFlags() != art::mirror::kClassFlagNormal) {
741 obj->VisitReferences(objf, art::VoidFunctor());
742 } else {
743 for (art::mirror::Class* cls = klass; cls != nullptr;
744 cls = cls->GetSuperClass().Ptr()) {
745 ForInstanceReferenceField(
746 cls, [obj, objf](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
747 objf(art::ObjPtr<art::mirror::Object>(obj), offset,
748 /*is_static=*/false);
749 });
750 }
751 }
752
753 uint64_t bytes_saved = 0;
754 uint64_t base_obj_id = GetObjectId(min_nonnull_ptr);
755 if (base_obj_id) {
756 // We need to decrement the base for object ids so that we can tell apart
757 // null references.
758 base_obj_id--;
759 }
760 if (base_obj_id) {
761 for (auto& p : referred_objects) {
762 art::mirror::Object*& referred_obj = p.second;
763 if (!referred_obj || IsIgnored(ignored_types, referred_obj)) {
764 referred_obj = nullptr;
765 continue;
766 }
767 uint64_t referred_obj_id = GetObjectId(referred_obj);
768 bytes_saved +=
769 EncodedSize(referred_obj_id) - EncodedSize(referred_obj_id - base_obj_id);
770 }
771 }
772
773 // +1 for storing the field id.
774 if (bytes_saved <= EncodedSize(base_obj_id) + 1) {
775 // Subtracting the base ptr gains fewer bytes than it takes to store it.
776 base_obj_id = 0;
777 }
778
779 for (auto& p : referred_objects) {
780 const std::string& field_name = p.first;
781 art::mirror::Object* referred_obj = p.second;
782 if (emit_field_ids) {
783 reference_field_ids->Append(FindOrAppend(&interned_fields, field_name));
784 }
785 uint64_t referred_obj_id = GetObjectId(referred_obj);
786 if (referred_obj_id) {
787 referred_obj_id -= base_obj_id;
788 }
789 reference_object_ids->Append(referred_obj_id);
790 }
791 if (emit_field_ids) {
792 object_proto->set_reference_field_id(*reference_field_ids);
793 reference_field_ids->Reset();
794 }
795 if (base_obj_id) {
796 object_proto->set_reference_field_id_base(base_obj_id);
797 }
798 object_proto->set_reference_object_id(*reference_object_ids);
799 reference_object_ids->Reset();
800 });
801
802 for (const auto& p : interned_locations) {
803 const std::string& str = p.first;
804 uint64_t id = p.second;
805
806 perfetto::protos::pbzero::InternedString* location_proto =
807 writer.GetHeapGraph()->add_location_names();
808 location_proto->set_iid(id);
809 location_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()),
810 str.size());
811 }
812 for (const auto& p : interned_fields) {
813 const std::string& str = p.first;
814 uint64_t id = p.second;
815
816 perfetto::protos::pbzero::InternedString* field_proto =
817 writer.GetHeapGraph()->add_field_names();
818 field_proto->set_iid(id);
819 field_proto->set_str(
820 reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
821 }
822
823 writer.Finalize();
824 ctx.Flush([] {
825 {
826 art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
827 g_state = State::kEnd;
828 GetStateCV().Broadcast(JavaHprofDataSource::art_thread());
829 }
830 });
831 // Wait for the Flush that will happen on the Perfetto thread.
832 {
833 art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
834 while (g_state != State::kEnd) {
835 GetStateCV().Wait(JavaHprofDataSource::art_thread());
836 }
837 }
838 {
839 auto ds = ctx.GetDataSourceLocked();
840 if (ds) {
841 ds->Finish();
842 } else {
843 LOG(ERROR) << "datasource timed out (duration_ms + datasource_stop_timeout_ms) "
844 "before dump finished";
845 }
846 }
847 });
848
849 LOG(INFO) << "finished dumping heap for " << parent_pid;
850 // Prevent the `atexit` handlers from running. We do not want to call cleanup
851 // functions the parent process has registered.
852 art::FastExit(0);
853 }
854
855 // The plugin initialization function.
ArtPlugin_Initialize()856 extern "C" bool ArtPlugin_Initialize() {
857 if (art::Runtime::Current() == nullptr) {
858 return false;
859 }
860 art::Thread* self = art::Thread::Current();
861 {
862 art::MutexLock lk(self, GetStateMutex());
863 if (g_state != State::kUninitialized) {
864 LOG(ERROR) << "perfetto_hprof already initialized. state: " << g_state;
865 return false;
866 }
867 g_state = State::kWaitForListener;
868 }
869
870 if (pipe2(g_signal_pipe_fds, O_CLOEXEC) == -1) {
871 PLOG(ERROR) << "Failed to pipe";
872 return false;
873 }
874
875 struct sigaction act = {};
876 act.sa_flags = SA_SIGINFO | SA_RESTART;
877 act.sa_sigaction = [](int, siginfo_t* si, void*) {
878 requested_tracing_session_id = si->si_value.sival_int;
879 if (write(g_signal_pipe_fds[1], kByte, sizeof(kByte)) == -1) {
880 PLOG(ERROR) << "Failed to trigger heap dump";
881 }
882 };
883
884 // TODO(fmayer): We can probably use the SignalCatcher thread here to not
885 // have an idle thread.
886 if (sigaction(kJavaHeapprofdSignal, &act, &g_orig_act) != 0) {
887 close(g_signal_pipe_fds[0]);
888 close(g_signal_pipe_fds[1]);
889 PLOG(ERROR) << "Failed to sigaction";
890 return false;
891 }
892
893 std::thread th([] {
894 art::Runtime* runtime = art::Runtime::Current();
895 if (!runtime) {
896 LOG(FATAL_WITHOUT_ABORT) << "no runtime in perfetto_hprof_listener";
897 return;
898 }
899 if (!runtime->AttachCurrentThread("perfetto_hprof_listener", /*as_daemon=*/ true,
900 runtime->GetSystemThreadGroup(), /*create_peer=*/ false)) {
901 LOG(ERROR) << "failed to attach thread.";
902 {
903 art::MutexLock lk(nullptr, GetStateMutex());
904 g_state = State::kUninitialized;
905 GetStateCV().Broadcast(nullptr);
906 }
907
908 return;
909 }
910 art::Thread* self = art::Thread::Current();
911 if (!self) {
912 LOG(FATAL_WITHOUT_ABORT) << "no thread in perfetto_hprof_listener";
913 return;
914 }
915 {
916 art::MutexLock lk(self, GetStateMutex());
917 if (g_state == State::kWaitForListener) {
918 g_state = State::kWaitForStart;
919 GetStateCV().Broadcast(self);
920 }
921 }
922 char buf[1];
923 for (;;) {
924 int res;
925 do {
926 res = read(g_signal_pipe_fds[0], buf, sizeof(buf));
927 } while (res == -1 && errno == EINTR);
928
929 if (res <= 0) {
930 if (res == -1) {
931 PLOG(ERROR) << "failed to read";
932 }
933 close(g_signal_pipe_fds[0]);
934 return;
935 }
936
937 perfetto_hprof::DumpPerfetto(self);
938 }
939 });
940 th.detach();
941
942 return true;
943 }
944
ArtPlugin_Deinitialize()945 extern "C" bool ArtPlugin_Deinitialize() {
946 if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
947 PLOG(ERROR) << "failed to reset signal handler";
948 // We cannot close the pipe if the signal handler wasn't unregistered,
949 // to avoid receiving SIGPIPE.
950 return false;
951 }
952 close(g_signal_pipe_fds[1]);
953
954 art::Thread* self = art::Thread::Current();
955 art::MutexLock lk(self, GetStateMutex());
956 // Wait until after the thread was registered to the runtime. This is so
957 // we do not attempt to register it with the runtime after it had been torn
958 // down (ArtPlugin_Deinitialize gets called in the Runtime dtor).
959 while (g_state == State::kWaitForListener) {
960 GetStateCV().Wait(art::Thread::Current());
961 }
962 g_state = State::kUninitialized;
963 GetStateCV().Broadcast(self);
964 return true;
965 }
966
967 } // namespace perfetto_hprof
968
969 namespace perfetto {
970
971 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(perfetto_hprof::JavaHprofDataSource);
972
973 }
974