1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "perfetto_hprof"
18 
19 #include "perfetto_hprof.h"
20 
21 #include <android-base/logging.h>
22 #include <base/fast_exit.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <sched.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/un.h>
31 #include <sys/wait.h>
32 #include <thread>
33 #include <time.h>
34 
35 #include <limits>
36 #include <optional>
37 #include <type_traits>
38 
39 #include "gc/heap-visit-objects-inl.h"
40 #include "gc/heap.h"
41 #include "gc/scoped_gc_critical_section.h"
42 #include "mirror/object-refvisitor-inl.h"
43 #include "nativehelper/scoped_local_ref.h"
44 #include "perfetto/profiling/parse_smaps.h"
45 #include "perfetto/trace/interned_data/interned_data.pbzero.h"
46 #include "perfetto/trace/profiling/heap_graph.pbzero.h"
47 #include "perfetto/trace/profiling/profile_common.pbzero.h"
48 #include "perfetto/trace/profiling/smaps.pbzero.h"
49 #include "perfetto/config/profiling/java_hprof_config.pbzero.h"
50 #include "perfetto/protozero/packed_repeated_fields.h"
51 #include "perfetto/tracing.h"
52 #include "runtime-inl.h"
53 #include "runtime_callbacks.h"
54 #include "scoped_thread_state_change-inl.h"
55 #include "thread_list.h"
56 #include "well_known_classes.h"
57 #include "dex/descriptors_names.h"
58 
59 // There are three threads involved in this:
60 // * listener thread: this is idle in the background when this plugin gets loaded, and waits
61 //   for data on on g_signal_pipe_fds.
62 // * signal thread: an arbitrary thread that handles the signal and writes data to
63 //   g_signal_pipe_fds.
64 // * perfetto producer thread: once the signal is received, the app forks. In the newly forked
65 //   child, the Perfetto Client API spawns a thread to communicate with traced.
66 
67 namespace perfetto_hprof {
68 
69 constexpr int kJavaHeapprofdSignal = __SIGRTMIN + 6;
70 constexpr time_t kWatchdogTimeoutSec = 120;
71 // This needs to be lower than the maximum acceptable chunk size, because this
72 // is checked *before* writing another submessage. We conservatively assume
73 // submessages can be up to 100k here for a 500k chunk size.
74 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
75 constexpr uint32_t kPacketSizeThreshold = 400000;
76 constexpr char kByte[1] = {'x'};
GetStateMutex()77 static art::Mutex& GetStateMutex() {
78   static art::Mutex state_mutex("perfetto_hprof_state_mutex", art::LockLevel::kGenericBottomLock);
79   return state_mutex;
80 }
81 
GetStateCV()82 static art::ConditionVariable& GetStateCV() {
83   static art::ConditionVariable state_cv("perfetto_hprof_state_cv", GetStateMutex());
84   return state_cv;
85 }
86 
87 static int requested_tracing_session_id = 0;
88 static State g_state = State::kUninitialized;
89 
90 // Pipe to signal from the signal handler into a worker thread that handles the
91 // dump requests.
92 int g_signal_pipe_fds[2];
93 static struct sigaction g_orig_act = {};
94 
95 template <typename T>
FindOrAppend(std::map<T,uint64_t> * m,const T & s)96 uint64_t FindOrAppend(std::map<T, uint64_t>* m, const T& s) {
97   auto it = m->find(s);
98   if (it == m->end()) {
99     std::tie(it, std::ignore) = m->emplace(s, m->size());
100   }
101   return it->second;
102 }
103 
ArmWatchdogOrDie()104 void ArmWatchdogOrDie() {
105   timer_t timerid{};
106   struct sigevent sev {};
107   sev.sigev_notify = SIGEV_SIGNAL;
108   sev.sigev_signo = SIGKILL;
109 
110   if (timer_create(CLOCK_MONOTONIC, &sev, &timerid) == -1) {
111     // This only gets called in the child, so we can fatal without impacting
112     // the app.
113     PLOG(FATAL) << "failed to create watchdog timer";
114   }
115 
116   struct itimerspec its {};
117   its.it_value.tv_sec = kWatchdogTimeoutSec;
118 
119   if (timer_settime(timerid, 0, &its, nullptr) == -1) {
120     // This only gets called in the child, so we can fatal without impacting
121     // the app.
122     PLOG(FATAL) << "failed to arm watchdog timer";
123   }
124 }
125 
StartsWith(const std::string & str,const std::string & prefix)126 bool StartsWith(const std::string& str, const std::string& prefix) {
127   return str.compare(0, prefix.length(), prefix) == 0;
128 }
129 
130 // Sample entries that match one of the following
131 // start with /system/
132 // start with /vendor/
133 // start with /data/app/
134 // contains "extracted in memory from Y", where Y matches any of the above
ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry & e)135 bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) {
136   if (StartsWith(e.pathname, "/system/") || StartsWith(e.pathname, "/vendor/") ||
137       StartsWith(e.pathname, "/data/app/")) {
138     return true;
139   }
140   if (StartsWith(e.pathname, "[anon:")) {
141     if (e.pathname.find("extracted in memory from /system/") != std::string::npos) {
142       return true;
143     }
144     if (e.pathname.find("extracted in memory from /vendor/") != std::string::npos) {
145       return true;
146     }
147     if (e.pathname.find("extracted in memory from /data/app/") != std::string::npos) {
148       return true;
149     }
150   }
151   return false;
152 }
153 
154 class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
155  public:
156   constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
157     perfetto::BufferExhaustedPolicy::kStall;
OnSetup(const SetupArgs & args)158   void OnSetup(const SetupArgs& args) override {
159     uint64_t normalized_cfg_tracing_session_id =
160       args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
161     if (requested_tracing_session_id < 0) {
162       LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
163       return;
164     }
165     if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_cfg_tracing_session_id) {
166       return;
167     }
168 
169     // This is on the heap as it triggers -Wframe-larger-than.
170     std::unique_ptr<perfetto::protos::pbzero::JavaHprofConfig::Decoder> cfg(
171         new perfetto::protos::pbzero::JavaHprofConfig::Decoder(
172           args.config->java_hprof_config_raw()));
173 
174     dump_smaps_ = cfg->dump_smaps();
175     for (auto it = cfg->ignored_types(); it; ++it) {
176       std::string name = (*it).ToStdString();
177       ignored_types_.emplace_back(std::move(name));
178     }
179     // This tracing session ID matches the requesting tracing session ID, so we know heapprofd
180     // has verified it targets this process.
181     enabled_ = true;
182   }
183 
dump_smaps()184   bool dump_smaps() { return dump_smaps_; }
enabled()185   bool enabled() { return enabled_; }
186 
OnStart(const StartArgs &)187   void OnStart(const StartArgs&) override {
188     if (!enabled()) {
189       return;
190     }
191     art::MutexLock lk(art_thread(), GetStateMutex());
192     if (g_state == State::kWaitForStart) {
193       g_state = State::kStart;
194       GetStateCV().Broadcast(art_thread());
195     }
196   }
197 
198   // This datasource can be used with a trace config with a short duration_ms
199   // but a long datasource_stop_timeout_ms. In that case, OnStop is called (in
200   // general) before the dump is done. In that case, we handle the stop
201   // asynchronously, and notify the tracing service once we are done.
202   // In case OnStop is called after the dump is done (but before the process)
203   // has exited, we just acknowledge the request.
OnStop(const StopArgs & a)204   void OnStop(const StopArgs& a) override {
205     art::MutexLock lk(art_thread(), finish_mutex_);
206     if (is_finished_) {
207       return;
208     }
209     is_stopped_ = true;
210     async_stop_ = std::move(a.HandleStopAsynchronously());
211   }
212 
art_thread()213   static art::Thread* art_thread() {
214     // TODO(fmayer): Attach the Perfetto producer thread to ART and give it a name. This is
215     // not trivial, we cannot just attach the first time this method is called, because
216     // AttachCurrentThread deadlocks with the ConditionVariable::Wait in WaitForDataSource.
217     //
218     // We should attach the thread as soon as the Client API spawns it, but that needs more
219     // complicated plumbing.
220     return nullptr;
221   }
222 
ignored_types()223   std::vector<std::string> ignored_types() { return ignored_types_; }
224 
Finish()225   void Finish() {
226     art::MutexLock lk(art_thread(), finish_mutex_);
227     if (is_stopped_) {
228       async_stop_();
229     } else {
230       is_finished_ = true;
231     }
232   }
233 
234  private:
235   bool enabled_ = false;
236   bool dump_smaps_ = false;
237   std::vector<std::string> ignored_types_;
238   static art::Thread* self_;
239 
240   art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock};
241   bool is_finished_ = false;
242   bool is_stopped_ = false;
243   std::function<void()> async_stop_;
244 };
245 
246 art::Thread* JavaHprofDataSource::self_ = nullptr;
247 
248 
WaitForDataSource(art::Thread * self)249 void WaitForDataSource(art::Thread* self) {
250   perfetto::TracingInitArgs args;
251   args.backends = perfetto::BackendType::kSystemBackend;
252   perfetto::Tracing::Initialize(args);
253 
254   perfetto::DataSourceDescriptor dsd;
255   dsd.set_name("android.java_hprof");
256   dsd.set_will_notify_on_stop(true);
257   JavaHprofDataSource::Register(dsd);
258 
259   LOG(INFO) << "waiting for data source";
260 
261   art::MutexLock lk(self, GetStateMutex());
262   while (g_state != State::kStart) {
263     GetStateCV().Wait(self);
264   }
265 }
266 
267 class Writer {
268  public:
Writer(pid_t parent_pid,JavaHprofDataSource::TraceContext * ctx,uint64_t timestamp)269   Writer(pid_t parent_pid, JavaHprofDataSource::TraceContext* ctx, uint64_t timestamp)
270       : parent_pid_(parent_pid), ctx_(ctx), timestamp_(timestamp),
271         last_written_(ctx_->written()) {}
272 
273   // Return whether the next call to GetHeapGraph will create a new TracePacket.
will_create_new_packet()274   bool will_create_new_packet() {
275     return !heap_graph_ || ctx_->written() - last_written_ > kPacketSizeThreshold;
276   }
277 
GetHeapGraph()278   perfetto::protos::pbzero::HeapGraph* GetHeapGraph() {
279     if (will_create_new_packet()) {
280       CreateNewHeapGraph();
281     }
282     return heap_graph_;
283   }
284 
CreateNewHeapGraph()285   void CreateNewHeapGraph() {
286     if (heap_graph_) {
287       heap_graph_->set_continued(true);
288     }
289     Finalize();
290 
291     uint64_t written = ctx_->written();
292 
293     trace_packet_ = ctx_->NewTracePacket();
294     trace_packet_->set_timestamp(timestamp_);
295     heap_graph_ = trace_packet_->set_heap_graph();
296     heap_graph_->set_pid(parent_pid_);
297     heap_graph_->set_index(index_++);
298 
299     last_written_ = written;
300   }
301 
Finalize()302   void Finalize() {
303     if (trace_packet_) {
304       trace_packet_->Finalize();
305     }
306     heap_graph_ = nullptr;
307   }
308 
~Writer()309   ~Writer() { Finalize(); }
310 
311  private:
312   const pid_t parent_pid_;
313   JavaHprofDataSource::TraceContext* const ctx_;
314   const uint64_t timestamp_;
315 
316   uint64_t last_written_ = 0;
317 
318   perfetto::DataSource<JavaHprofDataSource>::TraceContext::TracePacketHandle
319       trace_packet_;
320   perfetto::protos::pbzero::HeapGraph* heap_graph_ = nullptr;
321 
322   uint64_t index_ = 0;
323 };
324 
325 class ReferredObjectsFinder {
326  public:
ReferredObjectsFinder(std::vector<std::pair<std::string,art::mirror::Object * >> * referred_objects,art::mirror::Object ** min_nonnull_ptr)327   explicit ReferredObjectsFinder(
328       std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects,
329       art::mirror::Object** min_nonnull_ptr)
330       : referred_objects_(referred_objects), min_nonnull_ptr_(min_nonnull_ptr) {}
331 
332   // For art::mirror::Object::VisitReferences.
operator ()(art::ObjPtr<art::mirror::Object> obj,art::MemberOffset offset,bool is_static) const333   void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
334                   bool is_static) const
335       REQUIRES_SHARED(art::Locks::mutator_lock_) {
336     if (offset.Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
337       // Skip shadow$klass pointer.
338       return;
339     }
340     art::mirror::Object* ref = obj->GetFieldObject<art::mirror::Object>(offset);
341     art::ArtField* field;
342     if (is_static) {
343       field = art::ArtField::FindStaticFieldWithOffset(obj->AsClass(), offset.Uint32Value());
344     } else {
345       field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
346     }
347     std::string field_name = "";
348     if (field != nullptr) {
349       field_name = field->PrettyField(/*with_type=*/true);
350     }
351     referred_objects_->emplace_back(std::move(field_name), ref);
352     if (!*min_nonnull_ptr_ || (ref && *min_nonnull_ptr_ > ref)) {
353       *min_nonnull_ptr_ = ref;
354     }
355   }
356 
VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object> * root ATTRIBUTE_UNUSED) const357   void VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object>* root
358                               ATTRIBUTE_UNUSED) const {}
VisitRoot(art::mirror::CompressedReference<art::mirror::Object> * root ATTRIBUTE_UNUSED) const359   void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root
360                      ATTRIBUTE_UNUSED) const {}
361 
362  private:
363   // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
364   // fork.
365   std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
366   art::mirror::Object** min_nonnull_ptr_;
367 };
368 
369 class RootFinder : public art::SingleRootVisitor {
370  public:
RootFinder(std::map<art::RootType,std::vector<art::mirror::Object * >> * root_objects)371   explicit RootFinder(
372     std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects)
373       : root_objects_(root_objects) {}
374 
VisitRoot(art::mirror::Object * root,const art::RootInfo & info)375   void VisitRoot(art::mirror::Object* root, const art::RootInfo& info) override {
376     (*root_objects_)[info.GetType()].emplace_back(root);
377   }
378 
379  private:
380   // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
381   // fork.
382   std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects_;
383 };
384 
ToProtoType(art::RootType art_type)385 perfetto::protos::pbzero::HeapGraphRoot::Type ToProtoType(art::RootType art_type) {
386   using perfetto::protos::pbzero::HeapGraphRoot;
387   switch (art_type) {
388     case art::kRootUnknown:
389       return HeapGraphRoot::ROOT_UNKNOWN;
390     case art::kRootJNIGlobal:
391       return HeapGraphRoot::ROOT_JNI_GLOBAL;
392     case art::kRootJNILocal:
393       return HeapGraphRoot::ROOT_JNI_LOCAL;
394     case art::kRootJavaFrame:
395       return HeapGraphRoot::ROOT_JAVA_FRAME;
396     case art::kRootNativeStack:
397       return HeapGraphRoot::ROOT_NATIVE_STACK;
398     case art::kRootStickyClass:
399       return HeapGraphRoot::ROOT_STICKY_CLASS;
400     case art::kRootThreadBlock:
401       return HeapGraphRoot::ROOT_THREAD_BLOCK;
402     case art::kRootMonitorUsed:
403       return HeapGraphRoot::ROOT_MONITOR_USED;
404     case art::kRootThreadObject:
405       return HeapGraphRoot::ROOT_THREAD_OBJECT;
406     case art::kRootInternedString:
407       return HeapGraphRoot::ROOT_INTERNED_STRING;
408     case art::kRootFinalizing:
409       return HeapGraphRoot::ROOT_FINALIZING;
410     case art::kRootDebugger:
411       return HeapGraphRoot::ROOT_DEBUGGER;
412     case art::kRootReferenceCleanup:
413       return HeapGraphRoot::ROOT_REFERENCE_CLEANUP;
414     case art::kRootVMInternal:
415       return HeapGraphRoot::ROOT_VM_INTERNAL;
416     case art::kRootJNIMonitor:
417       return HeapGraphRoot::ROOT_JNI_MONITOR;
418   }
419 }
420 
ProtoClassKind(uint32_t class_flags)421 perfetto::protos::pbzero::HeapGraphType::Kind ProtoClassKind(uint32_t class_flags) {
422   using perfetto::protos::pbzero::HeapGraphType;
423   switch (class_flags) {
424     case art::mirror::kClassFlagNormal:
425       return HeapGraphType::KIND_NORMAL;
426     case art::mirror::kClassFlagNoReferenceFields:
427       return HeapGraphType::KIND_NOREFERENCES;
428     case art::mirror::kClassFlagString | art::mirror::kClassFlagNoReferenceFields:
429       return HeapGraphType::KIND_STRING;
430     case art::mirror::kClassFlagObjectArray:
431       return HeapGraphType::KIND_ARRAY;
432     case art::mirror::kClassFlagClass:
433       return HeapGraphType::KIND_CLASS;
434     case art::mirror::kClassFlagClassLoader:
435       return HeapGraphType::KIND_CLASSLOADER;
436     case art::mirror::kClassFlagDexCache:
437       return HeapGraphType::KIND_DEXCACHE;
438     case art::mirror::kClassFlagSoftReference:
439       return HeapGraphType::KIND_SOFT_REFERENCE;
440     case art::mirror::kClassFlagWeakReference:
441       return HeapGraphType::KIND_WEAK_REFERENCE;
442     case art::mirror::kClassFlagFinalizerReference:
443       return HeapGraphType::KIND_FINALIZER_REFERENCE;
444     case art::mirror::kClassFlagPhantomReference:
445       return HeapGraphType::KIND_PHANTOM_REFERENCE;
446     default:
447       return HeapGraphType::KIND_UNKNOWN;
448   }
449 }
450 
PrettyType(art::mirror::Class * klass)451 std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS {
452   if (klass == nullptr) {
453     return "(raw)";
454   }
455   std::string temp;
456   std::string result(art::PrettyDescriptor(klass->GetDescriptor(&temp)));
457   return result;
458 }
459 
DumpSmaps(JavaHprofDataSource::TraceContext * ctx)460 void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) {
461   FILE* smaps = fopen("/proc/self/smaps", "r");
462   if (smaps != nullptr) {
463     auto trace_packet = ctx->NewTracePacket();
464     auto* smaps_packet = trace_packet->set_smaps_packet();
465     smaps_packet->set_pid(getpid());
466     perfetto::profiling::ParseSmaps(smaps,
467         [&smaps_packet](const perfetto::profiling::SmapsEntry& e) {
468       if (ShouldSampleSmapsEntry(e)) {
469         auto* smaps_entry = smaps_packet->add_entries();
470         smaps_entry->set_path(e.pathname);
471         smaps_entry->set_size_kb(e.size_kb);
472         smaps_entry->set_private_dirty_kb(e.private_dirty_kb);
473         smaps_entry->set_swap_kb(e.swap_kb);
474       }
475     });
476     fclose(smaps);
477   } else {
478     PLOG(ERROR) << "failed to open smaps";
479   }
480 }
481 
GetObjectId(const art::mirror::Object * obj)482 uint64_t GetObjectId(const art::mirror::Object* obj) {
483   return reinterpret_cast<uint64_t>(obj) / std::alignment_of<art::mirror::Object>::value;
484 }
485 
486 template <typename F>
ForInstanceReferenceField(art::mirror::Class * klass,F fn)487 void ForInstanceReferenceField(art::mirror::Class* klass, F fn) NO_THREAD_SAFETY_ANALYSIS {
488   for (art::ArtField& af : klass->GetIFields()) {
489     if (af.IsPrimitiveType() ||
490         af.GetOffset().Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
491       continue;
492     }
493     fn(af.GetOffset());
494   }
495 }
496 
IsIgnored(const std::vector<std::string> & ignored_types,art::mirror::Object * obj)497 bool IsIgnored(const std::vector<std::string>& ignored_types,
498                art::mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS {
499   if (obj->IsClass()) {
500     return false;
501   }
502   art::mirror::Class* klass = obj->GetClass();
503   return std::find(ignored_types.begin(), ignored_types.end(), PrettyType(klass)) !=
504          ignored_types.end();
505 }
506 
EncodedSize(uint64_t n)507 size_t EncodedSize(uint64_t n) {
508   if (n == 0) return 1;
509   return 1 + static_cast<size_t>(art::MostSignificantBit(n)) / 7;
510 }
511 
DumpPerfetto(art::Thread * self)512 void DumpPerfetto(art::Thread* self) {
513   pid_t parent_pid = getpid();
514   LOG(INFO) << "preparing to dump heap for " << parent_pid;
515 
516   // Need to take a heap dump while GC isn't running. See the comment in
517   // Heap::VisitObjects(). Also we need the critical section to avoid visiting
518   // the same object twice. See b/34967844.
519   //
520   // We need to do this before the fork, because otherwise it can deadlock
521   // waiting for the GC, as all other threads get terminated by the clone, but
522   // their locks are not released.
523   // This does not perfectly solve all fork-related issues, as there could still be threads that
524   // are unaffected by ScopedSuspendAll and in a non-fork-friendly situation
525   // (e.g. inside a malloc holding a lock). This situation is quite rare, and in that case we will
526   // hit the watchdog in the grand-child process if it gets stuck.
527   std::optional<art::gc::ScopedGCCriticalSection> gcs(std::in_place, self, art::gc::kGcCauseHprof,
528                                                       art::gc::kCollectorTypeHprof);
529 
530   std::optional<art::ScopedSuspendAll> ssa(std::in_place, __FUNCTION__, /* long_suspend=*/ true);
531 
532   pid_t pid = fork();
533   if (pid == -1) {
534     // Fork error.
535     PLOG(ERROR) << "fork";
536     return;
537   }
538   if (pid != 0) {
539     // Parent
540     // Stop the thread suspension as soon as possible to allow the rest of the application to
541     // continue while we waitpid here.
542     ssa.reset();
543     gcs.reset();
544     for (size_t i = 0;; ++i) {
545       if (i == 1000) {
546         // The child hasn't exited for 1 second (and all it was supposed to do was fork itself).
547         // Give up and SIGKILL it. The next waitpid should succeed.
548         LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
549         kill(pid, SIGKILL);
550       }
551       // Busy waiting here will introduce some extra latency, but that is okay because we have
552       // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
553       // is not needed for progress of the app itself.
554       int stat_loc;
555       pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
556       if (wait_result == -1 && errno != EINTR) {
557         if (errno != ECHILD) {
558           // This hopefully never happens (should only be EINVAL).
559           PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
560         }
561         // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
562         // The child is no longer here either way, so that's good enough for us.
563         break;
564       } else if (wait_result > 0) {
565         break;
566       } else {  // wait_result == 0 || errno == EINTR.
567         usleep(1000);
568       }
569     }
570     return;
571   }
572 
573   // The following code is only executed by the child of the original process.
574 
575   // Uninstall signal handler, so we don't trigger a profile on it.
576   if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
577     close(g_signal_pipe_fds[0]);
578     close(g_signal_pipe_fds[1]);
579     PLOG(FATAL) << "Failed to sigaction";
580     return;
581   }
582 
583   // Daemon creates a new process that is the grand-child of the original process, and exits.
584   if (daemon(0, 0) == -1) {
585     PLOG(FATAL) << "daemon";
586   }
587 
588   // The following code is only executed by the grand-child of the original process.
589 
590   // Make sure that this is the first thing we do after forking, so if anything
591   // below hangs, the fork will go away from the watchdog.
592   ArmWatchdogOrDie();
593 
594   struct timespec ts = {};
595   if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
596     LOG(FATAL) << "Failed to get boottime.";
597   }
598   uint64_t timestamp = ts.tv_sec * 1000000000LL + ts.tv_nsec;
599 
600   WaitForDataSource(self);
601 
602   JavaHprofDataSource::Trace(
603       [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx)
604           NO_THREAD_SAFETY_ANALYSIS {
605             bool dump_smaps;
606             std::vector<std::string> ignored_types;
607             {
608               auto ds = ctx.GetDataSourceLocked();
609               if (!ds || !ds->enabled()) {
610                 if (ds) ds->Finish();
611                 LOG(INFO) << "skipping irrelevant data source.";
612                 return;
613               }
614               dump_smaps = ds->dump_smaps();
615               ignored_types = ds->ignored_types();
616             }
617             LOG(INFO) << "dumping heap for " << parent_pid;
618             if (dump_smaps) {
619               DumpSmaps(&ctx);
620             }
621             Writer writer(parent_pid, &ctx, timestamp);
622             // Make sure that intern ID 0 (default proto value for a uint64_t) always maps to ""
623             // (default proto value for a string).
624             std::map<std::string, uint64_t> interned_fields{{"", 0}};
625             std::map<std::string, uint64_t> interned_locations{{"", 0}};
626             std::map<uintptr_t, uint64_t> interned_classes{{0, 0}};
627 
628             std::map<art::RootType, std::vector<art::mirror::Object*>> root_objects;
629             RootFinder rcf(&root_objects);
630             art::Runtime::Current()->VisitRoots(&rcf);
631             std::unique_ptr<protozero::PackedVarInt> object_ids(
632                 new protozero::PackedVarInt);
633             for (const auto& p : root_objects) {
634               const art::RootType root_type = p.first;
635               const std::vector<art::mirror::Object*>& children = p.second;
636               perfetto::protos::pbzero::HeapGraphRoot* root_proto =
637                 writer.GetHeapGraph()->add_roots();
638               root_proto->set_root_type(ToProtoType(root_type));
639               for (art::mirror::Object* obj : children) {
640                 if (writer.will_create_new_packet()) {
641                   root_proto->set_object_ids(*object_ids);
642                   object_ids->Reset();
643                   root_proto = writer.GetHeapGraph()->add_roots();
644                   root_proto->set_root_type(ToProtoType(root_type));
645                 }
646                 object_ids->Append(GetObjectId(obj));
647               }
648               root_proto->set_object_ids(*object_ids);
649               object_ids->Reset();
650             }
651 
652             std::unique_ptr<protozero::PackedVarInt> reference_field_ids(
653                 new protozero::PackedVarInt);
654             std::unique_ptr<protozero::PackedVarInt> reference_object_ids(
655                 new protozero::PackedVarInt);
656 
657             uint64_t prev_object_id = 0;
658 
659             art::Runtime::Current()->GetHeap()->VisitObjectsPaused(
660                 [&writer, &interned_fields, &interned_locations, &reference_field_ids,
661                  &reference_object_ids, &interned_classes, &ignored_types, &prev_object_id](
662                     art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
663                   if (obj->IsClass()) {
664                     art::mirror::Class* klass = obj->AsClass().Ptr();
665                     perfetto::protos::pbzero::HeapGraphType* type_proto =
666                       writer.GetHeapGraph()->add_types();
667                     type_proto->set_id(FindOrAppend(&interned_classes,
668                           reinterpret_cast<uintptr_t>(klass)));
669                     type_proto->set_class_name(PrettyType(klass));
670                     type_proto->set_location_id(FindOrAppend(&interned_locations,
671                           klass->GetLocation()));
672                     type_proto->set_object_size(klass->GetObjectSize());
673                     type_proto->set_kind(ProtoClassKind(klass->GetClassFlags()));
674                     type_proto->set_classloader_id(GetObjectId(klass->GetClassLoader().Ptr()));
675                     if (klass->GetSuperClass().Ptr()) {
676                       type_proto->set_superclass_id(
677                         FindOrAppend(&interned_classes,
678                                      reinterpret_cast<uintptr_t>(klass->GetSuperClass().Ptr())));
679                     }
680                     ForInstanceReferenceField(
681                         klass, [klass, &reference_field_ids, &interned_fields](
682                                    art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
683                           auto art_field = art::ArtField::FindInstanceFieldWithOffset(
684                               klass, offset.Uint32Value());
685                           reference_field_ids->Append(
686                               FindOrAppend(&interned_fields, art_field->PrettyField(true)));
687                         });
688                     type_proto->set_reference_field_id(*reference_field_ids);
689                     reference_field_ids->Reset();
690                   }
691 
692                   art::mirror::Class* klass = obj->GetClass();
693                   uintptr_t class_ptr = reinterpret_cast<uintptr_t>(klass);
694                   // We need to synethesize a new type for Class<Foo>, which does not exist
695                   // in the runtime. Otherwise, all the static members of all classes would be
696                   // attributed to java.lang.Class.
697                   if (klass->IsClassClass()) {
698                     CHECK(obj->IsClass());
699                     perfetto::protos::pbzero::HeapGraphType* type_proto =
700                       writer.GetHeapGraph()->add_types();
701                     // All pointers are at least multiples of two, so this way we can make sure
702                     // we are not colliding with a real class.
703                     class_ptr = reinterpret_cast<uintptr_t>(obj) | 1;
704                     auto class_id = FindOrAppend(&interned_classes, class_ptr);
705                     type_proto->set_id(class_id);
706                     type_proto->set_class_name(obj->PrettyTypeOf());
707                     type_proto->set_location_id(FindOrAppend(&interned_locations,
708                           obj->AsClass()->GetLocation()));
709                   }
710 
711                   if (IsIgnored(ignored_types, obj)) {
712                     return;
713                   }
714 
715                   auto class_id = FindOrAppend(&interned_classes, class_ptr);
716 
717                   uint64_t object_id = GetObjectId(obj);
718                   perfetto::protos::pbzero::HeapGraphObject* object_proto =
719                     writer.GetHeapGraph()->add_objects();
720                   if (prev_object_id && prev_object_id < object_id) {
721                     object_proto->set_id_delta(object_id - prev_object_id);
722                   } else {
723                     object_proto->set_id(object_id);
724                   }
725                   prev_object_id = object_id;
726                   object_proto->set_type_id(class_id);
727 
728                   // Arrays / strings are magic and have an instance dependent size.
729                   if (obj->SizeOf() != klass->GetObjectSize())
730                     object_proto->set_self_size(obj->SizeOf());
731 
732                   std::vector<std::pair<std::string, art::mirror::Object*>>
733                       referred_objects;
734                   art::mirror::Object* min_nonnull_ptr = nullptr;
735                   ReferredObjectsFinder objf(&referred_objects, &min_nonnull_ptr);
736 
737                   const bool emit_field_ids =
738                       klass->GetClassFlags() != art::mirror::kClassFlagObjectArray &&
739                       klass->GetClassFlags() != art::mirror::kClassFlagNormal;
740                   if (klass->GetClassFlags() != art::mirror::kClassFlagNormal) {
741                     obj->VisitReferences(objf, art::VoidFunctor());
742                   } else {
743                     for (art::mirror::Class* cls = klass; cls != nullptr;
744                          cls = cls->GetSuperClass().Ptr()) {
745                       ForInstanceReferenceField(
746                           cls, [obj, objf](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
747                             objf(art::ObjPtr<art::mirror::Object>(obj), offset,
748                                  /*is_static=*/false);
749                           });
750                     }
751                   }
752 
753                   uint64_t bytes_saved = 0;
754                   uint64_t base_obj_id = GetObjectId(min_nonnull_ptr);
755                   if (base_obj_id) {
756                     // We need to decrement the base for object ids so that we can tell apart
757                     // null references.
758                     base_obj_id--;
759                   }
760                   if (base_obj_id) {
761                     for (auto& p : referred_objects) {
762                       art::mirror::Object*& referred_obj = p.second;
763                       if (!referred_obj || IsIgnored(ignored_types, referred_obj)) {
764                         referred_obj = nullptr;
765                         continue;
766                       }
767                       uint64_t referred_obj_id = GetObjectId(referred_obj);
768                       bytes_saved +=
769                           EncodedSize(referred_obj_id) - EncodedSize(referred_obj_id - base_obj_id);
770                     }
771                   }
772 
773                   // +1 for storing the field id.
774                   if (bytes_saved <= EncodedSize(base_obj_id) + 1) {
775                     // Subtracting the base ptr gains fewer bytes than it takes to store it.
776                     base_obj_id = 0;
777                   }
778 
779                   for (auto& p : referred_objects) {
780                     const std::string& field_name = p.first;
781                     art::mirror::Object* referred_obj = p.second;
782                     if (emit_field_ids) {
783                       reference_field_ids->Append(FindOrAppend(&interned_fields, field_name));
784                     }
785                     uint64_t referred_obj_id = GetObjectId(referred_obj);
786                     if (referred_obj_id) {
787                       referred_obj_id -= base_obj_id;
788                     }
789                     reference_object_ids->Append(referred_obj_id);
790                   }
791                   if (emit_field_ids) {
792                     object_proto->set_reference_field_id(*reference_field_ids);
793                     reference_field_ids->Reset();
794                   }
795                   if (base_obj_id) {
796                     object_proto->set_reference_field_id_base(base_obj_id);
797                   }
798                   object_proto->set_reference_object_id(*reference_object_ids);
799                   reference_object_ids->Reset();
800                 });
801 
802             for (const auto& p : interned_locations) {
803               const std::string& str = p.first;
804               uint64_t id = p.second;
805 
806               perfetto::protos::pbzero::InternedString* location_proto =
807                 writer.GetHeapGraph()->add_location_names();
808               location_proto->set_iid(id);
809               location_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()),
810                                   str.size());
811             }
812             for (const auto& p : interned_fields) {
813               const std::string& str = p.first;
814               uint64_t id = p.second;
815 
816               perfetto::protos::pbzero::InternedString* field_proto =
817                 writer.GetHeapGraph()->add_field_names();
818               field_proto->set_iid(id);
819               field_proto->set_str(
820                   reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
821             }
822 
823             writer.Finalize();
824             ctx.Flush([] {
825               {
826                 art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
827                 g_state = State::kEnd;
828                 GetStateCV().Broadcast(JavaHprofDataSource::art_thread());
829               }
830             });
831             // Wait for the Flush that will happen on the Perfetto thread.
832             {
833               art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
834               while (g_state != State::kEnd) {
835                 GetStateCV().Wait(JavaHprofDataSource::art_thread());
836               }
837             }
838             {
839               auto ds = ctx.GetDataSourceLocked();
840               if (ds) {
841                 ds->Finish();
842               } else {
843                 LOG(ERROR) << "datasource timed out (duration_ms + datasource_stop_timeout_ms) "
844                               "before dump finished";
845               }
846             }
847           });
848 
849   LOG(INFO) << "finished dumping heap for " << parent_pid;
850   // Prevent the `atexit` handlers from running. We do not want to call cleanup
851   // functions the parent process has registered.
852   art::FastExit(0);
853 }
854 
855 // The plugin initialization function.
ArtPlugin_Initialize()856 extern "C" bool ArtPlugin_Initialize() {
857   if (art::Runtime::Current() == nullptr) {
858     return false;
859   }
860   art::Thread* self = art::Thread::Current();
861   {
862     art::MutexLock lk(self, GetStateMutex());
863     if (g_state != State::kUninitialized) {
864       LOG(ERROR) << "perfetto_hprof already initialized. state: " << g_state;
865       return false;
866     }
867     g_state = State::kWaitForListener;
868   }
869 
870   if (pipe2(g_signal_pipe_fds, O_CLOEXEC) == -1) {
871     PLOG(ERROR) << "Failed to pipe";
872     return false;
873   }
874 
875   struct sigaction act = {};
876   act.sa_flags = SA_SIGINFO | SA_RESTART;
877   act.sa_sigaction = [](int, siginfo_t* si, void*) {
878     requested_tracing_session_id = si->si_value.sival_int;
879     if (write(g_signal_pipe_fds[1], kByte, sizeof(kByte)) == -1) {
880       PLOG(ERROR) << "Failed to trigger heap dump";
881     }
882   };
883 
884   // TODO(fmayer): We can probably use the SignalCatcher thread here to not
885   // have an idle thread.
886   if (sigaction(kJavaHeapprofdSignal, &act, &g_orig_act) != 0) {
887     close(g_signal_pipe_fds[0]);
888     close(g_signal_pipe_fds[1]);
889     PLOG(ERROR) << "Failed to sigaction";
890     return false;
891   }
892 
893   std::thread th([] {
894     art::Runtime* runtime = art::Runtime::Current();
895     if (!runtime) {
896       LOG(FATAL_WITHOUT_ABORT) << "no runtime in perfetto_hprof_listener";
897       return;
898     }
899     if (!runtime->AttachCurrentThread("perfetto_hprof_listener", /*as_daemon=*/ true,
900                                       runtime->GetSystemThreadGroup(), /*create_peer=*/ false)) {
901       LOG(ERROR) << "failed to attach thread.";
902       {
903         art::MutexLock lk(nullptr, GetStateMutex());
904         g_state = State::kUninitialized;
905         GetStateCV().Broadcast(nullptr);
906       }
907 
908       return;
909     }
910     art::Thread* self = art::Thread::Current();
911     if (!self) {
912       LOG(FATAL_WITHOUT_ABORT) << "no thread in perfetto_hprof_listener";
913       return;
914     }
915     {
916       art::MutexLock lk(self, GetStateMutex());
917       if (g_state == State::kWaitForListener) {
918         g_state = State::kWaitForStart;
919         GetStateCV().Broadcast(self);
920       }
921     }
922     char buf[1];
923     for (;;) {
924       int res;
925       do {
926         res = read(g_signal_pipe_fds[0], buf, sizeof(buf));
927       } while (res == -1 && errno == EINTR);
928 
929       if (res <= 0) {
930         if (res == -1) {
931           PLOG(ERROR) << "failed to read";
932         }
933         close(g_signal_pipe_fds[0]);
934         return;
935       }
936 
937       perfetto_hprof::DumpPerfetto(self);
938     }
939   });
940   th.detach();
941 
942   return true;
943 }
944 
ArtPlugin_Deinitialize()945 extern "C" bool ArtPlugin_Deinitialize() {
946   if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
947     PLOG(ERROR) << "failed to reset signal handler";
948     // We cannot close the pipe if the signal handler wasn't unregistered,
949     // to avoid receiving SIGPIPE.
950     return false;
951   }
952   close(g_signal_pipe_fds[1]);
953 
954   art::Thread* self = art::Thread::Current();
955   art::MutexLock lk(self, GetStateMutex());
956   // Wait until after the thread was registered to the runtime. This is so
957   // we do not attempt to register it with the runtime after it had been torn
958   // down (ArtPlugin_Deinitialize gets called in the Runtime dtor).
959   while (g_state == State::kWaitForListener) {
960     GetStateCV().Wait(art::Thread::Current());
961   }
962   g_state = State::kUninitialized;
963   GetStateCV().Broadcast(self);
964   return true;
965 }
966 
967 }  // namespace perfetto_hprof
968 
969 namespace perfetto {
970 
971 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(perfetto_hprof::JavaHprofDataSource);
972 
973 }
974