1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "profile_saver.h"
18
19 #include <fcntl.h>
20 #include <sys/resource.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23
24 #include "android-base/strings.h"
25
26 #include "art_method-inl.h"
27 #include "base/enums.h"
28 #include "base/logging.h" // For VLOG.
29 #include "base/scoped_arena_containers.h"
30 #include "base/stl_util.h"
31 #include "base/systrace.h"
32 #include "base/time_utils.h"
33 #include "class_table-inl.h"
34 #include "compiler_filter.h"
35 #include "dex/dex_file_loader.h"
36 #include "dex_reference_collection.h"
37 #include "gc/collector_type.h"
38 #include "gc/gc_cause.h"
39 #include "gc/scoped_gc_critical_section.h"
40 #include "jit/jit.h"
41 #include "jit/profiling_info.h"
42 #include "oat_file_manager.h"
43 #include "profile/profile_compilation_info.h"
44 #include "scoped_thread_state_change-inl.h"
45
46 namespace art {
47
48 using Hotness = ProfileCompilationInfo::MethodHotness;
49
50 ProfileSaver* ProfileSaver::instance_ = nullptr;
51 pthread_t ProfileSaver::profiler_pthread_ = 0U;
52
53 static_assert(ProfileCompilationInfo::kIndividualInlineCacheSize ==
54 InlineCache::kIndividualCacheSize,
55 "InlineCache and ProfileCompilationInfo do not agree on kIndividualCacheSize");
56
57 // At what priority to schedule the saver threads. 9 is the lowest foreground priority on device.
58 static constexpr int kProfileSaverPthreadPriority = 9;
59
SetProfileSaverThreadPriority(pthread_t thread,int priority)60 static void SetProfileSaverThreadPriority(pthread_t thread, int priority) {
61 #if defined(ART_TARGET_ANDROID)
62 int result = setpriority(PRIO_PROCESS, pthread_gettid_np(thread), priority);
63 if (result != 0) {
64 LOG(ERROR) << "Failed to setpriority to :" << priority;
65 }
66 #else
67 UNUSED(thread);
68 UNUSED(priority);
69 #endif
70 }
71
GetDefaultThreadPriority()72 static int GetDefaultThreadPriority() {
73 #if defined(ART_TARGET_ANDROID)
74 pthread_attr_t attr;
75 sched_param param;
76 pthread_attr_init(&attr);
77 pthread_attr_getschedparam(&attr, ¶m);
78 return param.sched_priority;
79 #else
80 return 0;
81 #endif
82 }
83
ProfileSaver(const ProfileSaverOptions & options,const std::string & output_filename,jit::JitCodeCache * jit_code_cache,const std::vector<std::string> & code_paths)84 ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
85 const std::string& output_filename,
86 jit::JitCodeCache* jit_code_cache,
87 const std::vector<std::string>& code_paths)
88 : jit_code_cache_(jit_code_cache),
89 shutting_down_(false),
90 last_time_ns_saver_woke_up_(0),
91 jit_activity_notifications_(0),
92 wait_lock_("ProfileSaver wait lock"),
93 period_condition_("ProfileSaver period condition", wait_lock_),
94 total_bytes_written_(0),
95 total_number_of_writes_(0),
96 total_number_of_code_cache_queries_(0),
97 total_number_of_skipped_writes_(0),
98 total_number_of_failed_writes_(0),
99 total_ms_of_sleep_(0),
100 total_ns_of_work_(0),
101 total_number_of_hot_spikes_(0),
102 total_number_of_wake_ups_(0),
103 options_(options) {
104 DCHECK(options_.IsEnabled());
105 AddTrackedLocations(output_filename, code_paths);
106 }
107
~ProfileSaver()108 ProfileSaver::~ProfileSaver() {
109 for (auto& it : profile_cache_) {
110 delete it.second;
111 }
112 }
113
NotifyStartupCompleted()114 void ProfileSaver::NotifyStartupCompleted() {
115 Thread* self = Thread::Current();
116 MutexLock mu(self, *Locks::profiler_lock_);
117 if (instance_ == nullptr || instance_->shutting_down_) {
118 return;
119 }
120 MutexLock mu2(self, instance_->wait_lock_);
121 instance_->period_condition_.Signal(self);
122 }
123
Run()124 void ProfileSaver::Run() {
125 Thread* self = Thread::Current();
126
127 // For thread annotalysis, the setup is more complicated than it should be. Run needs to start
128 // under mutex, but should drop it.
129 Locks::profiler_lock_->ExclusiveUnlock(self);
130
131 // Fetch the resolved classes for the app images after sleeping for
132 // options_.GetSaveResolvedClassesDelayMs().
133 // TODO(calin) This only considers the case of the primary profile file.
134 // Anything that gets loaded in the same VM will not have their resolved
135 // classes save (unless they started before the initial saving was done).
136 {
137 MutexLock mu(self, wait_lock_);
138 const uint64_t end_time = NanoTime() + MsToNs(options_.GetSaveResolvedClassesDelayMs());
139 while (!Runtime::Current()->GetStartupCompleted()) {
140 const uint64_t current_time = NanoTime();
141 if (current_time >= end_time) {
142 break;
143 }
144 period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
145 }
146 total_ms_of_sleep_ += options_.GetSaveResolvedClassesDelayMs();
147 }
148 // Tell the runtime that startup is completed if it has not already been notified.
149 // TODO: We should use another thread to do this in case the profile saver is not running.
150 Runtime::Current()->NotifyStartupCompleted();
151
152 FetchAndCacheResolvedClassesAndMethods(/*startup=*/ true);
153
154 // When we save without waiting for JIT notifications we use a simple
155 // exponential back off policy bounded by max_wait_without_jit.
156 uint32_t max_wait_without_jit = options_.GetMinSavePeriodMs() * 16;
157 uint64_t cur_wait_without_jit = options_.GetMinSavePeriodMs();
158 // Loop for the profiled methods.
159 while (!ShuttingDown(self)) {
160 uint64_t sleep_start = NanoTime();
161 {
162 uint64_t sleep_time = 0;
163 {
164 MutexLock mu(self, wait_lock_);
165 if (options_.GetWaitForJitNotificationsToSave()) {
166 period_condition_.Wait(self);
167 } else {
168 period_condition_.TimedWait(self, cur_wait_without_jit, 0);
169 if (cur_wait_without_jit < max_wait_without_jit) {
170 cur_wait_without_jit *= 2;
171 }
172 }
173 sleep_time = NanoTime() - sleep_start;
174 }
175 // Check if the thread was woken up for shutdown.
176 if (ShuttingDown(self)) {
177 break;
178 }
179 total_number_of_wake_ups_++;
180 // We might have been woken up by a huge number of notifications to guarantee saving.
181 // If we didn't meet the minimum saving period go back to sleep (only if missed by
182 // a reasonable margin).
183 uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs());
184 while (min_save_period_ns * 0.9 > sleep_time) {
185 {
186 MutexLock mu(self, wait_lock_);
187 period_condition_.TimedWait(self, NsToMs(min_save_period_ns - sleep_time), 0);
188 sleep_time = NanoTime() - sleep_start;
189 }
190 // Check if the thread was woken up for shutdown.
191 if (ShuttingDown(self)) {
192 break;
193 }
194 total_number_of_wake_ups_++;
195 }
196 }
197 total_ms_of_sleep_ += NsToMs(NanoTime() - sleep_start);
198
199 if (ShuttingDown(self)) {
200 break;
201 }
202
203 uint16_t number_of_new_methods = 0;
204 uint64_t start_work = NanoTime();
205 bool profile_saved_to_disk = ProcessProfilingInfo(/*force_save=*/false, &number_of_new_methods);
206 // Update the notification counter based on result. Note that there might be contention on this
207 // but we don't care about to be 100% precise.
208 if (!profile_saved_to_disk) {
209 // If we didn't save to disk it may be because we didn't have enough new methods.
210 // Set the jit activity notifications to number_of_new_methods so we can wake up earlier
211 // if needed.
212 jit_activity_notifications_ = number_of_new_methods;
213 }
214 total_ns_of_work_ += NanoTime() - start_work;
215 }
216 }
217
NotifyJitActivity()218 void ProfileSaver::NotifyJitActivity() {
219 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
220 if (instance_ == nullptr || instance_->shutting_down_) {
221 return;
222 }
223 instance_->NotifyJitActivityInternal();
224 }
225
WakeUpSaver()226 void ProfileSaver::WakeUpSaver() {
227 jit_activity_notifications_ = 0;
228 last_time_ns_saver_woke_up_ = NanoTime();
229 period_condition_.Signal(Thread::Current());
230 }
231
NotifyJitActivityInternal()232 void ProfileSaver::NotifyJitActivityInternal() {
233 // Unlikely to overflow but if it happens,
234 // we would have waken up the saver long before that.
235 jit_activity_notifications_++;
236 // Note that we are not as precise as we could be here but we don't want to wake the saver
237 // every time we see a hot method.
238 if (jit_activity_notifications_ > options_.GetMinNotificationBeforeWake()) {
239 MutexLock wait_mutex(Thread::Current(), wait_lock_);
240 if ((NanoTime() - last_time_ns_saver_woke_up_) > MsToNs(options_.GetMinSavePeriodMs())) {
241 WakeUpSaver();
242 } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) {
243 // Make sure to wake up the saver if we see a spike in the number of notifications.
244 // This is a precaution to avoid losing a big number of methods in case
245 // this is a spike with no jit after.
246 total_number_of_hot_spikes_++;
247 WakeUpSaver();
248 }
249 }
250 }
251
252 class ScopedDefaultPriority {
253 public:
ScopedDefaultPriority(pthread_t thread)254 explicit ScopedDefaultPriority(pthread_t thread) : thread_(thread) {
255 SetProfileSaverThreadPriority(thread_, GetDefaultThreadPriority());
256 }
257
~ScopedDefaultPriority()258 ~ScopedDefaultPriority() {
259 SetProfileSaverThreadPriority(thread_, kProfileSaverPthreadPriority);
260 }
261
262 private:
263 const pthread_t thread_;
264 };
265
266 // GetClassLoadersVisitor takes a snapshot of the class loaders and stores them in the out
267 // class_loaders argument. Not affected by class unloading since there are no suspend points in
268 // the caller.
269 class GetClassLoadersVisitor : public ClassLoaderVisitor {
270 public:
GetClassLoadersVisitor(VariableSizedHandleScope * hs,std::vector<Handle<mirror::ClassLoader>> * class_loaders)271 explicit GetClassLoadersVisitor(VariableSizedHandleScope* hs,
272 std::vector<Handle<mirror::ClassLoader>>* class_loaders)
273 : hs_(hs),
274 class_loaders_(class_loaders) {}
275
Visit(ObjPtr<mirror::ClassLoader> class_loader)276 void Visit(ObjPtr<mirror::ClassLoader> class_loader)
277 REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) override {
278 class_loaders_->push_back(hs_->NewHandle(class_loader));
279 }
280
281 private:
282 VariableSizedHandleScope* const hs_;
283 std::vector<Handle<mirror::ClassLoader>>* const class_loaders_;
284 };
285
286 // GetClassesVisitor takes a snapshot of the loaded classes that we may want to visit and stores
287 // them in the out argument. Not affected by class unloading since there are no suspend points in
288 // the caller.
289 class GetClassesVisitor : public ClassVisitor {
290 public:
GetClassesVisitor(bool profile_boot_class_path,ScopedArenaVector<ObjPtr<mirror::Class>> * out)291 explicit GetClassesVisitor(bool profile_boot_class_path,
292 ScopedArenaVector<ObjPtr<mirror::Class>>* out)
293 : profile_boot_class_path_(profile_boot_class_path),
294 out_(out) {}
295
operator ()(ObjPtr<mirror::Class> klass)296 bool operator()(ObjPtr<mirror::Class> klass) override REQUIRES_SHARED(Locks::mutator_lock_) {
297 if (klass->IsProxyClass() ||
298 klass->IsArrayClass() ||
299 klass->IsPrimitive() ||
300 !klass->IsResolved() ||
301 klass->IsErroneousResolved() ||
302 (!profile_boot_class_path_ && klass->GetClassLoader() == nullptr)) {
303 return true;
304 }
305 out_->push_back(klass);
306 return true;
307 }
308
309 private:
310 const bool profile_boot_class_path_;
311 ScopedArenaVector<ObjPtr<mirror::Class>>* const out_;
312 };
313
314 using MethodReferenceCollection = DexReferenceCollection<uint16_t, ScopedArenaAllocatorAdapter>;
315 using TypeReferenceCollection = DexReferenceCollection<dex::TypeIndex,
316 ScopedArenaAllocatorAdapter>;
317
318 // Iterate over all of the loaded classes and visit each one. For each class, add it to the
319 // resolved_classes out argument if startup is true.
320 // Add methods to the hot_methods out argument if the number of samples is greater or equal to
321 // hot_method_sample_threshold, add it to sampled_methods if it has at least one sample.
SampleClassesAndExecutedMethods(pthread_t profiler_pthread,bool profile_boot_class_path,ScopedArenaAllocator * allocator,uint32_t hot_method_sample_threshold,bool startup,TypeReferenceCollection * resolved_classes,MethodReferenceCollection * hot_methods,MethodReferenceCollection * sampled_methods)322 static void SampleClassesAndExecutedMethods(pthread_t profiler_pthread,
323 bool profile_boot_class_path,
324 ScopedArenaAllocator* allocator,
325 uint32_t hot_method_sample_threshold,
326 bool startup,
327 TypeReferenceCollection* resolved_classes,
328 MethodReferenceCollection* hot_methods,
329 MethodReferenceCollection* sampled_methods) {
330 Thread* const self = Thread::Current();
331 ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
332 // Restore profile saver thread priority during the GC critical section. This helps prevent
333 // priority inversions blocking the GC for long periods of time.
334 std::unique_ptr<ScopedDefaultPriority> sdp;
335 // Only restore default priority if we are the profile saver thread. Other threads that call this
336 // are threads calling Stop and the signal catcher (for SIGUSR1).
337 if (pthread_self() == profiler_pthread) {
338 sdp.reset(new ScopedDefaultPriority(profiler_pthread));
339 }
340
341 // Do ScopedGCCriticalSection before acquiring mutator lock to prevent the GC running and
342 // blocking threads during thread root flipping. Since the GC is a background thread, blocking it
343 // is not a problem.
344 ScopedObjectAccess soa(self);
345 gc::ScopedGCCriticalSection sgcs(self,
346 gc::kGcCauseProfileSaver,
347 gc::kCollectorTypeCriticalSection);
348 VariableSizedHandleScope hs(soa.Self());
349 std::vector<Handle<mirror::ClassLoader>> class_loaders;
350 if (profile_boot_class_path) {
351 // First add the boot class loader since visit classloaders doesn't visit it.
352 class_loaders.push_back(hs.NewHandle<mirror::ClassLoader>(nullptr));
353 }
354 GetClassLoadersVisitor class_loader_visitor(&hs, &class_loaders);
355 {
356 // Read the class loaders into a temporary array to prevent contention problems on the
357 // class_linker_classes_lock.
358 ScopedTrace trace2("Get class loaders");
359 ReaderMutexLock mu(soa.Self(), *Locks::classlinker_classes_lock_);
360 class_linker->VisitClassLoaders(&class_loader_visitor);
361 }
362 ScopedArenaVector<ObjPtr<mirror::Class>> classes(allocator->Adapter());
363 for (Handle<mirror::ClassLoader> class_loader : class_loaders) {
364 ClassTable* table = class_linker->ClassTableForClassLoader(class_loader.Get());
365 if (table == nullptr) {
366 // If the class loader has not loaded any classes, it may have a null table.
367 continue;
368 }
369 GetClassesVisitor get_classes_visitor(profile_boot_class_path, &classes);
370 {
371 // Collect the classes into a temporary array to prevent lock contention on the class
372 // table lock. We want to avoid blocking class loading in other threads as much as
373 // possible.
374 ScopedTrace trace3("Visiting class table");
375 table->Visit(get_classes_visitor);
376 }
377 for (ObjPtr<mirror::Class> klass : classes) {
378 if (startup) {
379 // We only record classes for the startup case. This may change in the future.
380 resolved_classes->AddReference(&klass->GetDexFile(), klass->GetDexTypeIndex());
381 }
382 // Visit all of the methods in the class to see which ones were executed.
383 for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
384 if (!method.IsNative() && !method.IsAbstract()) {
385 DCHECK(!method.IsProxyMethod());
386 const uint16_t counter = method.GetCounter();
387 // Mark startup methods as hot if they have more than hot_method_sample_threshold
388 // samples. This means they will get compiled by the compiler driver.
389 if (method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
390 method.PreviouslyWarm() ||
391 counter >= hot_method_sample_threshold) {
392 hot_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
393 } else if (counter != 0) {
394 sampled_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
395 }
396 } else {
397 // We do not record native methods. Once we AOT-compile the app, all native
398 // methods shall have their thunks compiled.
399 }
400 }
401 }
402 classes.clear();
403 }
404 }
405
FetchAndCacheResolvedClassesAndMethods(bool startup)406 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods(bool startup) {
407 ScopedTrace trace(__PRETTY_FUNCTION__);
408 const uint64_t start_time = NanoTime();
409
410 // Resolve any new registered locations.
411 ResolveTrackedLocations();
412
413 Thread* const self = Thread::Current();
414 Runtime* const runtime = Runtime::Current();
415 ArenaStack stack(runtime->GetArenaPool());
416 ScopedArenaAllocator allocator(&stack);
417 MethodReferenceCollection hot_methods(allocator.Adapter(), allocator.Adapter());
418 MethodReferenceCollection sampled_methods(allocator.Adapter(), allocator.Adapter());
419 TypeReferenceCollection resolved_classes(allocator.Adapter(), allocator.Adapter());
420 const bool is_low_ram = Runtime::Current()->GetHeap()->IsLowMemoryMode();
421 pthread_t profiler_pthread;
422 {
423 MutexLock mu(self, *Locks::profiler_lock_);
424 profiler_pthread = profiler_pthread_;
425 }
426 uint32_t hot_method_sample_threshold = std::numeric_limits<uint32_t>::max();
427 if (startup) {
428 hot_method_sample_threshold = options_.GetHotStartupMethodSamples(is_low_ram);
429 } else if (Runtime::Current()->GetJit() != nullptr) {
430 hot_method_sample_threshold = Runtime::Current()->GetJit()->WarmMethodThreshold();
431 }
432 SampleClassesAndExecutedMethods(profiler_pthread,
433 options_.GetProfileBootClassPath(),
434 &allocator,
435 hot_method_sample_threshold,
436 startup,
437 &resolved_classes,
438 &hot_methods,
439 &sampled_methods);
440 MutexLock mu(self, *Locks::profiler_lock_);
441
442 for (const auto& it : tracked_dex_base_locations_) {
443 const std::string& filename = it.first;
444 auto info_it = profile_cache_.find(filename);
445 if (info_it == profile_cache_.end()) {
446 info_it = profile_cache_.Put(
447 filename,
448 new ProfileCompilationInfo(
449 Runtime::Current()->GetArenaPool(), options_.GetProfileBootClassPath()));
450 }
451 ProfileCompilationInfo* cached_info = info_it->second;
452
453 const std::set<std::string>& locations = it.second;
454 VLOG(profiler) << "Locations for " << it.first << " " << android::base::Join(locations, ':');
455
456 for (const auto& pair : hot_methods.GetMap()) {
457 const DexFile* const dex_file = pair.first;
458 const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation());
459 const MethodReferenceCollection::IndexVector& indices = pair.second;
460 VLOG(profiler) << "Location " << dex_file->GetLocation()
461 << " base_location=" << base_location
462 << " found=" << (locations.find(base_location) != locations.end())
463 << " indices size=" << indices.size();
464 if (locations.find(base_location) != locations.end()) {
465 uint32_t flags = Hotness::kFlagHot;
466 flags |= startup ? Hotness::kFlagStartup : Hotness::kFlagPostStartup;
467 cached_info->AddMethodsForDex(
468 AnnotateSampleFlags(flags),
469 dex_file,
470 indices.begin(),
471 indices.end(),
472 GetProfileSampleAnnotation());
473 }
474 }
475 for (const auto& pair : sampled_methods.GetMap()) {
476 const DexFile* const dex_file = pair.first;
477 const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation());
478 const MethodReferenceCollection::IndexVector& indices = pair.second;
479 VLOG(profiler) << "Location " << base_location
480 << " found=" << (locations.find(base_location) != locations.end())
481 << " indices size=" << indices.size();
482 if (locations.find(base_location) != locations.end()) {
483 cached_info->AddMethodsForDex(
484 AnnotateSampleFlags(startup ? Hotness::kFlagStartup : Hotness::kFlagPostStartup),
485 dex_file,
486 indices.begin(),
487 indices.end(),
488 GetProfileSampleAnnotation());
489 }
490 }
491 for (const auto& pair : resolved_classes.GetMap()) {
492 const DexFile* const dex_file = pair.first;
493 const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation());
494 if (locations.find(base_location) != locations.end()) {
495 const TypeReferenceCollection::IndexVector& classes = pair.second;
496 VLOG(profiler) << "Added " << classes.size() << " classes for location "
497 << base_location
498 << " (" << dex_file->GetLocation() << ")";
499 cached_info->AddClassesForDex(dex_file,
500 classes.begin(),
501 classes.end(),
502 GetProfileSampleAnnotation());
503 } else {
504 VLOG(profiler) << "Location not found " << base_location;
505 }
506 }
507 }
508 VLOG(profiler) << "Profile saver recorded " << hot_methods.NumReferences() << " hot methods and "
509 << sampled_methods.NumReferences() << " sampled methods with threshold "
510 << hot_method_sample_threshold << " in "
511 << PrettyDuration(NanoTime() - start_time);
512 }
513
ProcessProfilingInfo(bool force_save,uint16_t * number_of_new_methods)514 bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
515 ScopedTrace trace(__PRETTY_FUNCTION__);
516
517 // Resolve any new registered locations.
518 ResolveTrackedLocations();
519
520 SafeMap<std::string, std::set<std::string>> tracked_locations;
521 {
522 // Make a copy so that we don't hold the lock while doing I/O.
523 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
524 tracked_locations = tracked_dex_base_locations_;
525 }
526
527 bool profile_file_saved = false;
528 if (number_of_new_methods != nullptr) {
529 *number_of_new_methods = 0;
530 }
531
532 // We only need to do this once, not once per dex location.
533 // TODO: Figure out a way to only do it when stuff has changed? It takes 30-50ms.
534 FetchAndCacheResolvedClassesAndMethods(/*startup=*/ false);
535
536 for (const auto& it : tracked_locations) {
537 if (!force_save && ShuttingDown(Thread::Current())) {
538 // The ProfileSaver is in shutdown mode, meaning a stop request was made and
539 // we need to exit cleanly (by waiting for the saver thread to finish). Unless
540 // we have a request for a forced save, do not do any processing so that we
541 // speed up the exit.
542 return true;
543 }
544 const std::string& filename = it.first;
545 const std::set<std::string>& locations = it.second;
546 VLOG(profiler) << "Tracked filename " << filename << " locations "
547 << android::base::Join(locations, ":");
548
549 std::vector<ProfileMethodInfo> profile_methods;
550 {
551 ScopedObjectAccess soa(Thread::Current());
552 jit_code_cache_->GetProfiledMethods(locations, profile_methods);
553 total_number_of_code_cache_queries_++;
554 }
555 {
556 ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
557 if (!info.Load(filename, /*clear_if_invalid=*/ true)) {
558 LOG(WARNING) << "Could not forcefully load profile " << filename;
559 continue;
560 }
561 if (options_.GetProfileBootClassPath() != info.IsForBootImage()) {
562 // If we enabled boot class path profiling but the profile is a regular one,
563 // (or the opposite), clear the profile. We do not support cross-version merges.
564 LOG(WARNING) << "Adjust profile version: for_boot_classpath="
565 << options_.GetProfileBootClassPath();
566 info.ClearDataAndAdjustVersion(options_.GetProfileBootClassPath());
567 // For saving to ensure we persist the new version.
568 force_save = true;
569 }
570 uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
571 uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
572 VLOG(profiler) << "last_save_number_of_methods=" << last_save_number_of_methods
573 << " last_save_number_of_classes=" << last_save_number_of_classes
574 << " number of profiled methods=" << profile_methods.size();
575
576 // Try to add the method data. Note this may fail is the profile loaded from disk contains
577 // outdated data (e.g. the previous profiled dex files might have been updated).
578 // If this happens we clear the profile data and for the save to ensure the file is cleared.
579 if (!info.AddMethods(
580 profile_methods,
581 AnnotateSampleFlags(Hotness::kFlagHot | Hotness::kFlagPostStartup),
582 GetProfileSampleAnnotation())) {
583 LOG(WARNING) << "Could not add methods to the existing profiler. "
584 << "Clearing the profile data.";
585 info.ClearData();
586 force_save = true;
587 }
588
589 auto profile_cache_it = profile_cache_.find(filename);
590 if (profile_cache_it != profile_cache_.end()) {
591 if (!info.MergeWith(*(profile_cache_it->second))) {
592 LOG(WARNING) << "Could not merge the profile. Clearing the profile data.";
593 info.ClearData();
594 force_save = true;
595 }
596 } else if (VLOG_IS_ON(profiler)) {
597 LOG(INFO) << "Failed to find cached profile for " << filename;
598 for (auto&& pair : profile_cache_) {
599 LOG(INFO) << "Cached profile " << pair.first;
600 }
601 }
602
603 int64_t delta_number_of_methods =
604 info.GetNumberOfMethods() - last_save_number_of_methods;
605 int64_t delta_number_of_classes =
606 info.GetNumberOfResolvedClasses() - last_save_number_of_classes;
607
608 if (!force_save &&
609 delta_number_of_methods < options_.GetMinMethodsToSave() &&
610 delta_number_of_classes < options_.GetMinClassesToSave()) {
611 VLOG(profiler) << "Not enough information to save to: " << filename
612 << " Number of methods: " << delta_number_of_methods
613 << " Number of classes: " << delta_number_of_classes;
614 total_number_of_skipped_writes_++;
615 continue;
616 }
617
618 if (number_of_new_methods != nullptr) {
619 *number_of_new_methods =
620 std::max(static_cast<uint16_t>(delta_number_of_methods),
621 *number_of_new_methods);
622 }
623 uint64_t bytes_written;
624 // Force the save. In case the profile data is corrupted or the the profile
625 // has the wrong version this will "fix" the file to the correct format.
626 if (info.Save(filename, &bytes_written)) {
627 // We managed to save the profile. Clear the cache stored during startup.
628 if (profile_cache_it != profile_cache_.end()) {
629 ProfileCompilationInfo *cached_info = profile_cache_it->second;
630 profile_cache_.erase(profile_cache_it);
631 delete cached_info;
632 }
633 if (bytes_written > 0) {
634 total_number_of_writes_++;
635 total_bytes_written_ += bytes_written;
636 profile_file_saved = true;
637 } else {
638 // At this point we could still have avoided the write.
639 // We load and merge the data from the file lazily at its first ever
640 // save attempt. So, whatever we are trying to save could already be
641 // in the file.
642 total_number_of_skipped_writes_++;
643 }
644 } else {
645 LOG(WARNING) << "Could not save profiling info to " << filename;
646 total_number_of_failed_writes_++;
647 }
648 }
649 }
650
651 // Trim the maps to madvise the pages used for profile info.
652 // It is unlikely we will need them again in the near feature.
653 Runtime::Current()->GetArenaPool()->TrimMaps();
654
655 return profile_file_saved;
656 }
657
RunProfileSaverThread(void * arg)658 void* ProfileSaver::RunProfileSaverThread(void* arg) {
659 Runtime* runtime = Runtime::Current();
660
661 bool attached = runtime->AttachCurrentThread("Profile Saver",
662 /*as_daemon=*/true,
663 runtime->GetSystemThreadGroup(),
664 /*create_peer=*/true);
665 if (!attached) {
666 CHECK(runtime->IsShuttingDown(Thread::Current()));
667 return nullptr;
668 }
669
670 {
671 Locks::profiler_lock_->ExclusiveLock(Thread::Current());
672 CHECK_EQ(reinterpret_cast<ProfileSaver*>(arg), instance_);
673 instance_->Run();
674 }
675
676 runtime->DetachCurrentThread();
677 VLOG(profiler) << "Profile saver shutdown";
678 return nullptr;
679 }
680
ShouldProfileLocation(const std::string & location,bool profile_aot_code)681 static bool ShouldProfileLocation(const std::string& location, bool profile_aot_code) {
682 if (profile_aot_code) {
683 // If we have to profile all the code, irrespective of its compilation state, return true
684 // right away.
685 return true;
686 }
687
688 OatFileManager& oat_manager = Runtime::Current()->GetOatFileManager();
689 const OatFile* oat_file = oat_manager.FindOpenedOatFileFromDexLocation(location);
690 if (oat_file == nullptr) {
691 // This can happen if we fallback to run code directly from the APK.
692 // Profile it with the hope that the background dexopt will get us back into
693 // a good state.
694 VLOG(profiler) << "Asked to profile a location without an oat file:" << location;
695 return true;
696 }
697 CompilerFilter::Filter filter = oat_file->GetCompilerFilter();
698 if ((filter == CompilerFilter::kSpeed) || (filter == CompilerFilter::kEverything)) {
699 VLOG(profiler)
700 << "Skip profiling oat file because it's already speed|everything compiled: "
701 << location << " oat location: " << oat_file->GetLocation();
702 return false;
703 }
704 return true;
705 }
706
Start(const ProfileSaverOptions & options,const std::string & output_filename,jit::JitCodeCache * jit_code_cache,const std::vector<std::string> & code_paths)707 void ProfileSaver::Start(const ProfileSaverOptions& options,
708 const std::string& output_filename,
709 jit::JitCodeCache* jit_code_cache,
710 const std::vector<std::string>& code_paths) {
711 Runtime* const runtime = Runtime::Current();
712 DCHECK(options.IsEnabled());
713 DCHECK(runtime->GetJit() != nullptr);
714 DCHECK(!output_filename.empty());
715 DCHECK(jit_code_cache != nullptr);
716
717 std::vector<std::string> code_paths_to_profile;
718 for (const std::string& location : code_paths) {
719 if (ShouldProfileLocation(location, options.GetProfileAOTCode())) {
720 VLOG(profiler) << "Code path to profile " << location;
721 code_paths_to_profile.push_back(location);
722 }
723 }
724
725 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
726 // Support getting profile samples for the boot class path. This will be used to generate the boot
727 // image profile. The intention is to use this code to generate to boot image but not use it in
728 // production. b/37966211
729 if (options.GetProfileBootClassPath()) {
730 std::set<std::string> code_paths_keys;
731 for (const std::string& location : code_paths) {
732 // Use the profile base key for checking file uniqueness (as it is constructed solely based
733 // on the location and ignores other metadata like origin package).
734 code_paths_keys.insert(ProfileCompilationInfo::GetProfileDexFileBaseKey(location));
735 }
736 for (const DexFile* dex_file : runtime->GetClassLinker()->GetBootClassPath()) {
737 // Don't check ShouldProfileLocation since the boot class path may be speed compiled.
738 const std::string& location = dex_file->GetLocation();
739 const std::string key = ProfileCompilationInfo::GetProfileDexFileBaseKey(location);
740 VLOG(profiler) << "Registering boot dex file " << location;
741 if (code_paths_keys.find(key) != code_paths_keys.end()) {
742 LOG(WARNING) << "Boot class path location key conflicts with code path " << location;
743 } else if (instance_ == nullptr) {
744 // Only add the boot class path once since Start may be called multiple times for secondary
745 // dexes.
746 // We still do the collision check above. This handles any secondary dexes that conflict
747 // with the boot class path dex files.
748 code_paths_to_profile.push_back(location);
749 }
750 }
751 }
752 if (code_paths_to_profile.empty()) {
753 VLOG(profiler) << "No code paths should be profiled.";
754 return;
755 }
756
757 if (instance_ != nullptr) {
758 // If we already have an instance, make sure it uses the same jit_code_cache.
759 // This may be called multiple times via Runtime::registerAppInfo (e.g. for
760 // apps which share the same runtime).
761 DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
762 // Add the code_paths to the tracked locations.
763 instance_->AddTrackedLocations(output_filename, code_paths_to_profile);
764 return;
765 }
766
767 VLOG(profiler) << "Starting profile saver using output file: " << output_filename
768 << ". Tracking: " << android::base::Join(code_paths_to_profile, ':');
769
770 instance_ = new ProfileSaver(options,
771 output_filename,
772 jit_code_cache,
773 code_paths_to_profile);
774
775 // Create a new thread which does the saving.
776 CHECK_PTHREAD_CALL(
777 pthread_create,
778 (&profiler_pthread_, nullptr, &RunProfileSaverThread, reinterpret_cast<void*>(instance_)),
779 "Profile saver thread");
780
781 SetProfileSaverThreadPriority(profiler_pthread_, kProfileSaverPthreadPriority);
782 }
783
Stop(bool dump_info)784 void ProfileSaver::Stop(bool dump_info) {
785 ProfileSaver* profile_saver = nullptr;
786 pthread_t profiler_pthread = 0U;
787
788 {
789 MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
790 VLOG(profiler) << "Stopping profile saver thread";
791 profile_saver = instance_;
792 profiler_pthread = profiler_pthread_;
793 if (instance_ == nullptr) {
794 DCHECK(false) << "Tried to stop a profile saver which was not started";
795 return;
796 }
797 if (instance_->shutting_down_) {
798 DCHECK(false) << "Tried to stop the profile saver twice";
799 return;
800 }
801 instance_->shutting_down_ = true;
802 }
803
804 {
805 // Wake up the saver thread if it is sleeping to allow for a clean exit.
806 MutexLock wait_mutex(Thread::Current(), profile_saver->wait_lock_);
807 profile_saver->period_condition_.Signal(Thread::Current());
808 }
809
810 // Force save everything before destroying the thread since we want profiler_pthread_ to remain
811 // valid.
812 profile_saver->ProcessProfilingInfo(/*force_save=*/true, /*number_of_new_methods=*/nullptr);
813
814 // Wait for the saver thread to stop.
815 CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profile saver thread shutdown");
816
817 {
818 MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
819 if (dump_info) {
820 instance_->DumpInfo(LOG_STREAM(INFO));
821 }
822 instance_ = nullptr;
823 profiler_pthread_ = 0U;
824 }
825 delete profile_saver;
826 }
827
ShuttingDown(Thread * self)828 bool ProfileSaver::ShuttingDown(Thread* self) {
829 MutexLock mu(self, *Locks::profiler_lock_);
830 return shutting_down_;
831 }
832
IsStarted()833 bool ProfileSaver::IsStarted() {
834 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
835 return instance_ != nullptr;
836 }
837
AddTrackedLocationsToMap(const std::string & output_filename,const std::vector<std::string> & code_paths,SafeMap<std::string,std::set<std::string>> * map)838 static void AddTrackedLocationsToMap(const std::string& output_filename,
839 const std::vector<std::string>& code_paths,
840 SafeMap<std::string, std::set<std::string>>* map) {
841 std::vector<std::string> code_paths_and_filenames;
842 // The dex locations are sometimes set to the filename instead of the full path.
843 // So make sure we have both "locations" when tracking what needs to be profiled.
844 // - apps + system server have filenames
845 // - boot classpath elements have full paths
846
847 // TODO(calin, ngeoffray, vmarko) This is an workaround for using filanames as
848 // dex locations - needed to prebuilt with a partial boot image
849 // (commit: c4a924d8c74241057d957d360bf31cd5cd0e4f9c).
850 // We should find a better way which allows us to do the tracking based on full paths.
851 for (const std::string& path : code_paths) {
852 size_t last_sep_index = path.find_last_of('/');
853 if (last_sep_index == path.size() - 1) {
854 // Should not happen, but anyone can register code paths so better be prepared and ignore
855 // such locations.
856 continue;
857 }
858 std::string filename = last_sep_index == std::string::npos
859 ? path
860 : path.substr(last_sep_index + 1);
861
862 code_paths_and_filenames.push_back(path);
863 code_paths_and_filenames.push_back(filename);
864 }
865
866 auto it = map->find(output_filename);
867 if (it == map->end()) {
868 map->Put(
869 output_filename,
870 std::set<std::string>(code_paths_and_filenames.begin(), code_paths_and_filenames.end()));
871 } else {
872 it->second.insert(code_paths_and_filenames.begin(), code_paths_and_filenames.end());
873 }
874 }
875
AddTrackedLocations(const std::string & output_filename,const std::vector<std::string> & code_paths)876 void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
877 const std::vector<std::string>& code_paths) {
878 // Add the code paths to the list of tracked location.
879 AddTrackedLocationsToMap(output_filename, code_paths, &tracked_dex_base_locations_);
880 // The code paths may contain symlinks which could fool the profiler.
881 // If the dex file is compiled with an absolute location but loaded with symlink
882 // the profiler could skip the dex due to location mismatch.
883 // To avoid this, we add the code paths to the temporary cache of 'to_be_resolved'
884 // locations. When the profiler thread executes we will resolve the paths to their
885 // real paths.
886 // Note that we delay taking the realpath to avoid spending more time than needed
887 // when registering location (as it is done during app launch).
888 AddTrackedLocationsToMap(output_filename,
889 code_paths,
890 &tracked_dex_base_locations_to_be_resolved_);
891 }
892
DumpInstanceInfo(std::ostream & os)893 void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
894 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
895 if (instance_ != nullptr) {
896 instance_->DumpInfo(os);
897 }
898 }
899
DumpInfo(std::ostream & os)900 void ProfileSaver::DumpInfo(std::ostream& os) {
901 os << "ProfileSaver total_bytes_written=" << total_bytes_written_ << '\n'
902 << "ProfileSaver total_number_of_writes=" << total_number_of_writes_ << '\n'
903 << "ProfileSaver total_number_of_code_cache_queries="
904 << total_number_of_code_cache_queries_ << '\n'
905 << "ProfileSaver total_number_of_skipped_writes=" << total_number_of_skipped_writes_ << '\n'
906 << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
907 << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
908 << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
909 << "ProfileSaver total_number_of_hot_spikes=" << total_number_of_hot_spikes_ << '\n'
910 << "ProfileSaver total_number_of_wake_ups=" << total_number_of_wake_ups_ << '\n';
911 }
912
913
ForceProcessProfiles()914 void ProfileSaver::ForceProcessProfiles() {
915 ProfileSaver* saver = nullptr;
916 {
917 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
918 saver = instance_;
919 }
920 // TODO(calin): this is not actually thread safe as the instance_ may have been deleted,
921 // but we only use this in testing when we now this won't happen.
922 // Refactor the way we handle the instance so that we don't end up in this situation.
923 if (saver != nullptr) {
924 saver->ProcessProfilingInfo(/*force_save=*/true, /*number_of_new_methods=*/nullptr);
925 }
926 }
927
HasSeenMethod(const std::string & profile,bool hot,MethodReference ref)928 bool ProfileSaver::HasSeenMethod(const std::string& profile, bool hot, MethodReference ref) {
929 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
930 if (instance_ != nullptr) {
931 ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
932 if (!info.Load(profile, /*clear_if_invalid=*/false)) {
933 return false;
934 }
935 const ProfileCompilationInfo::MethodHotness hotness = info.GetMethodHotness(ref);
936 return hot ? hotness.IsHot() : hotness.IsInProfile();
937 }
938 return false;
939 }
940
ResolveTrackedLocations()941 void ProfileSaver::ResolveTrackedLocations() {
942 SafeMap<std::string, std::set<std::string>> locations_to_be_resolved;
943 {
944 // Make a copy so that we don't hold the lock while doing I/O.
945 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
946 locations_to_be_resolved = tracked_dex_base_locations_to_be_resolved_;
947 tracked_dex_base_locations_to_be_resolved_.clear();
948 }
949
950 // Resolve the locations.
951 SafeMap<std::string, std::vector<std::string>> resolved_locations_map;
952 for (const auto& it : locations_to_be_resolved) {
953 const std::string& filename = it.first;
954 const std::set<std::string>& locations = it.second;
955 auto resolved_locations_it = resolved_locations_map.Put(
956 filename,
957 std::vector<std::string>(locations.size()));
958
959 for (const auto& location : locations) {
960 UniqueCPtr<const char[]> location_real(realpath(location.c_str(), nullptr));
961 // Note that it's ok if we cannot get the real path.
962 if (location_real != nullptr) {
963 resolved_locations_it->second.emplace_back(location_real.get());
964 }
965 }
966 }
967
968 // Add the resolved locations to the tracked collection.
969 MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
970 for (const auto& it : resolved_locations_map) {
971 AddTrackedLocationsToMap(it.first, it.second, &tracked_dex_base_locations_);
972 }
973 }
974
GetProfileSampleAnnotation()975 ProfileCompilationInfo::ProfileSampleAnnotation ProfileSaver::GetProfileSampleAnnotation() {
976 // Ideally, this would be cached in the ProfileSaver class, when we start the thread.
977 // However the profile is initialized before the process package name is set and fixing this
978 // would require unnecessary complex synchronizations.
979 std::string package_name = Runtime::Current()->GetProcessPackageName();
980 if (package_name.empty()) {
981 package_name = "unknown";
982 }
983 // We only use annotation for the boot image profiles. Regular apps do not use the extra
984 // metadata and as such there is no need to pay the cost (storage and computational)
985 // that comes with the annotations.
986 return options_.GetProfileBootClassPath()
987 ? ProfileCompilationInfo::ProfileSampleAnnotation(package_name)
988 : ProfileCompilationInfo::ProfileSampleAnnotation::kNone;
989 }
990
AnnotateSampleFlags(uint32_t flags)991 Hotness::Flag ProfileSaver::AnnotateSampleFlags(uint32_t flags) {
992 uint32_t extra_flags = 0;
993 // We only add the extra flags for the boot image profile because individual apps do not use
994 // this information.
995 if (options_.GetProfileBootClassPath()) {
996 extra_flags = Is64BitInstructionSet(Runtime::Current()->GetInstructionSet())
997 ? Hotness::kFlag64bit
998 : Hotness::kFlag32bit;
999 }
1000 return static_cast<Hotness::Flag>(flags | extra_flags);
1001 }
1002
1003 } // namespace art
1004