1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/sampling_heap_profiler/sampling_heap_profiler.h"
6 
7 #include <algorithm>
8 #include <cmath>
9 #include <utility>
10 
11 #include "base/allocator/allocator_shim.h"
12 #include "base/allocator/buildflags.h"
13 #include "base/allocator/partition_allocator/partition_alloc.h"
14 #include "base/atomicops.h"
15 #include "base/debug/stack_trace.h"
16 #include "base/macros.h"
17 #include "base/no_destructor.h"
18 #include "base/partition_alloc_buildflags.h"
19 #include "base/rand_util.h"
20 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
21 #include "base/threading/thread_local_storage.h"
22 #include "build/build_config.h"
23 
24 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
25     defined(OFFICIAL_BUILD)
26 #include "base/trace_event/cfi_backtrace_android.h"
27 #endif
28 
29 namespace base {
30 
31 using base::allocator::AllocatorDispatch;
32 using base::subtle::Atomic32;
33 using base::subtle::AtomicWord;
34 
35 namespace {
36 
37 // Control how many top frames to skip when recording call stack.
38 // These frames correspond to the profiler own frames.
39 const uint32_t kSkipBaseAllocatorFrames = 2;
40 
41 const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
42 
43 // Controls if sample intervals should not be randomized. Used for testing.
44 bool g_deterministic;
45 
46 // A positive value if profiling is running, otherwise it's zero.
47 Atomic32 g_running;
48 
49 // Pointer to the current |LockFreeAddressHashSet|.
50 AtomicWord g_sampled_addresses_set;
51 
52 // Sampling interval parameter, the mean value for intervals between samples.
53 AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
54 
55 void (*g_hooks_install_callback)();
56 Atomic32 g_hooks_installed;
57 
AllocFn(const AllocatorDispatch * self,size_t size,void * context)58 void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
59   void* address = self->next->alloc_function(self->next, size, context);
60   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
61   return address;
62 }
63 
AllocZeroInitializedFn(const AllocatorDispatch * self,size_t n,size_t size,void * context)64 void* AllocZeroInitializedFn(const AllocatorDispatch* self,
65                              size_t n,
66                              size_t size,
67                              void* context) {
68   void* address =
69       self->next->alloc_zero_initialized_function(self->next, n, size, context);
70   SamplingHeapProfiler::RecordAlloc(address, n * size,
71                                     kSkipBaseAllocatorFrames);
72   return address;
73 }
74 
AllocAlignedFn(const AllocatorDispatch * self,size_t alignment,size_t size,void * context)75 void* AllocAlignedFn(const AllocatorDispatch* self,
76                      size_t alignment,
77                      size_t size,
78                      void* context) {
79   void* address =
80       self->next->alloc_aligned_function(self->next, alignment, size, context);
81   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
82   return address;
83 }
84 
ReallocFn(const AllocatorDispatch * self,void * address,size_t size,void * context)85 void* ReallocFn(const AllocatorDispatch* self,
86                 void* address,
87                 size_t size,
88                 void* context) {
89   // Note: size == 0 actually performs free.
90   SamplingHeapProfiler::RecordFree(address);
91   address = self->next->realloc_function(self->next, address, size, context);
92   SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
93   return address;
94 }
95 
FreeFn(const AllocatorDispatch * self,void * address,void * context)96 void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
97   SamplingHeapProfiler::RecordFree(address);
98   self->next->free_function(self->next, address, context);
99 }
100 
GetSizeEstimateFn(const AllocatorDispatch * self,void * address,void * context)101 size_t GetSizeEstimateFn(const AllocatorDispatch* self,
102                          void* address,
103                          void* context) {
104   return self->next->get_size_estimate_function(self->next, address, context);
105 }
106 
BatchMallocFn(const AllocatorDispatch * self,size_t size,void ** results,unsigned num_requested,void * context)107 unsigned BatchMallocFn(const AllocatorDispatch* self,
108                        size_t size,
109                        void** results,
110                        unsigned num_requested,
111                        void* context) {
112   unsigned num_allocated = self->next->batch_malloc_function(
113       self->next, size, results, num_requested, context);
114   for (unsigned i = 0; i < num_allocated; ++i) {
115     SamplingHeapProfiler::RecordAlloc(results[i], size,
116                                       kSkipBaseAllocatorFrames);
117   }
118   return num_allocated;
119 }
120 
BatchFreeFn(const AllocatorDispatch * self,void ** to_be_freed,unsigned num_to_be_freed,void * context)121 void BatchFreeFn(const AllocatorDispatch* self,
122                  void** to_be_freed,
123                  unsigned num_to_be_freed,
124                  void* context) {
125   for (unsigned i = 0; i < num_to_be_freed; ++i)
126     SamplingHeapProfiler::RecordFree(to_be_freed[i]);
127   self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
128                                   context);
129 }
130 
FreeDefiniteSizeFn(const AllocatorDispatch * self,void * address,size_t size,void * context)131 void FreeDefiniteSizeFn(const AllocatorDispatch* self,
132                         void* address,
133                         size_t size,
134                         void* context) {
135   SamplingHeapProfiler::RecordFree(address);
136   self->next->free_definite_size_function(self->next, address, size, context);
137 }
138 
139 AllocatorDispatch g_allocator_dispatch = {&AllocFn,
140                                           &AllocZeroInitializedFn,
141                                           &AllocAlignedFn,
142                                           &ReallocFn,
143                                           &FreeFn,
144                                           &GetSizeEstimateFn,
145                                           &BatchMallocFn,
146                                           &BatchFreeFn,
147                                           &FreeDefiniteSizeFn,
148                                           nullptr};
149 
150 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
151 
PartitionAllocHook(void * address,size_t size,const char *)152 void PartitionAllocHook(void* address, size_t size, const char*) {
153   SamplingHeapProfiler::RecordAlloc(address, size);
154 }
155 
PartitionFreeHook(void * address)156 void PartitionFreeHook(void* address) {
157   SamplingHeapProfiler::RecordFree(address);
158 }
159 
160 #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
161 
AccumulatedBytesTLS()162 ThreadLocalStorage::Slot& AccumulatedBytesTLS() {
163   static base::NoDestructor<base::ThreadLocalStorage::Slot>
164       accumulated_bytes_tls;
165   return *accumulated_bytes_tls;
166 }
167 
168 }  // namespace
169 
Sample(size_t size,size_t total,uint32_t ordinal)170 SamplingHeapProfiler::Sample::Sample(size_t size,
171                                      size_t total,
172                                      uint32_t ordinal)
173     : size(size), total(total), ordinal(ordinal) {}
174 
175 SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
176 
177 SamplingHeapProfiler::Sample::~Sample() = default;
178 
179 SamplingHeapProfiler* SamplingHeapProfiler::instance_;
180 
SamplingHeapProfiler()181 SamplingHeapProfiler::SamplingHeapProfiler() {
182   instance_ = this;
183   auto sampled_addresses = std::make_unique<LockFreeAddressHashSet>(64);
184   base::subtle::NoBarrier_Store(
185       &g_sampled_addresses_set,
186       reinterpret_cast<AtomicWord>(sampled_addresses.get()));
187   sampled_addresses_stack_.push(std::move(sampled_addresses));
188 }
189 
190 // static
InitTLSSlot()191 void SamplingHeapProfiler::InitTLSSlot() {
192   // Preallocate the TLS slot early, so it can't cause reentracy issues
193   // when sampling is started.
194   ignore_result(AccumulatedBytesTLS().Get());
195 }
196 
197 // static
InstallAllocatorHooksOnce()198 void SamplingHeapProfiler::InstallAllocatorHooksOnce() {
199   static bool hook_installed = InstallAllocatorHooks();
200   ignore_result(hook_installed);
201 }
202 
203 // static
InstallAllocatorHooks()204 bool SamplingHeapProfiler::InstallAllocatorHooks() {
205 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
206   base::allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
207 #else
208   ignore_result(g_allocator_dispatch);
209   DLOG(WARNING)
210       << "base::allocator shims are not available for memory sampling.";
211 #endif  // BUILDFLAG(USE_ALLOCATOR_SHIM)
212 
213 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
214   base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook);
215   base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook);
216 #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
217 
218   int32_t hooks_install_callback_has_been_set =
219       base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1);
220   if (hooks_install_callback_has_been_set)
221     g_hooks_install_callback();
222 
223   return true;
224 }
225 
226 // static
SetHooksInstallCallback(void (* hooks_install_callback)())227 void SamplingHeapProfiler::SetHooksInstallCallback(
228     void (*hooks_install_callback)()) {
229   CHECK(!g_hooks_install_callback && hooks_install_callback);
230   g_hooks_install_callback = hooks_install_callback;
231 
232   int32_t profiler_has_already_been_initialized =
233       base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1);
234   if (profiler_has_already_been_initialized)
235     g_hooks_install_callback();
236 }
237 
Start()238 uint32_t SamplingHeapProfiler::Start() {
239 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
240     defined(OFFICIAL_BUILD)
241   if (!base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()
242            ->can_unwind_stack_frames()) {
243     LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
244     return 0;
245   }
246 #endif
247   InstallAllocatorHooksOnce();
248   base::subtle::Barrier_AtomicIncrement(&g_running, 1);
249   return last_sample_ordinal_;
250 }
251 
Stop()252 void SamplingHeapProfiler::Stop() {
253   AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1);
254   CHECK_GE(count, 0);
255 }
256 
SetSamplingInterval(size_t sampling_interval)257 void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
258   // TODO(alph): Reset the sample being collected if running.
259   base::subtle::Release_Store(&g_sampling_interval,
260                               static_cast<AtomicWord>(sampling_interval));
261 }
262 
263 // static
GetNextSampleInterval(size_t interval)264 size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) {
265   if (UNLIKELY(g_deterministic))
266     return interval;
267 
268   // We sample with a Poisson process, with constant average sampling
269   // interval. This follows the exponential probability distribution with
270   // parameter λ = 1/interval where |interval| is the average number of bytes
271   // between samples.
272   // Let u be a uniformly distributed random number between 0 and 1, then
273   // next_sample = -ln(u) / λ
274   double uniform = base::RandDouble();
275   double value = -log(uniform) * interval;
276   size_t min_value = sizeof(intptr_t);
277   // We limit the upper bound of a sample interval to make sure we don't have
278   // huge gaps in the sampling stream. Probability of the upper bound gets hit
279   // is exp(-20) ~ 2e-9, so it should not skew the distibution.
280   size_t max_value = interval * 20;
281   if (UNLIKELY(value < min_value))
282     return min_value;
283   if (UNLIKELY(value > max_value))
284     return max_value;
285   return static_cast<size_t>(value);
286 }
287 
288 // static
RecordAlloc(void * address,size_t size,uint32_t skip_frames)289 void SamplingHeapProfiler::RecordAlloc(void* address,
290                                        size_t size,
291                                        uint32_t skip_frames) {
292   if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running)))
293     return;
294   if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
295     return;
296 
297   // TODO(alph): On MacOS it may call the hook several times for a single
298   // allocation. Handle the case.
299 
300   intptr_t accumulated_bytes =
301       reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get());
302   accumulated_bytes += size;
303   if (LIKELY(accumulated_bytes < 0)) {
304     AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
305     return;
306   }
307 
308   size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval);
309   size_t samples = accumulated_bytes / mean_interval;
310   accumulated_bytes %= mean_interval;
311 
312   do {
313     accumulated_bytes -= GetNextSampleInterval(mean_interval);
314     ++samples;
315   } while (accumulated_bytes >= 0);
316 
317   AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
318 
319   instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames);
320 }
321 
RecordStackTrace(Sample * sample,uint32_t skip_frames)322 void SamplingHeapProfiler::RecordStackTrace(Sample* sample,
323                                             uint32_t skip_frames) {
324 #if !defined(OS_NACL)
325   constexpr uint32_t kMaxStackEntries = 256;
326   constexpr uint32_t kSkipProfilerOwnFrames = 2;
327   skip_frames += kSkipProfilerOwnFrames;
328 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
329     defined(OFFICIAL_BUILD)
330   const void* frames[kMaxStackEntries];
331   size_t frame_count =
332       base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
333           frames, kMaxStackEntries);
334 #elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
335   const void* frames[kMaxStackEntries];
336   size_t frame_count = base::debug::TraceStackFramePointers(
337       frames, kMaxStackEntries, skip_frames);
338   skip_frames = 0;
339 #else
340   // Fall-back to capturing the stack with base::debug::StackTrace,
341   // which is likely slower, but more reliable.
342   base::debug::StackTrace stack_trace(kMaxStackEntries);
343   size_t frame_count = 0;
344   const void* const* frames = stack_trace.Addresses(&frame_count);
345 #endif
346 
347   sample->stack.insert(
348       sample->stack.end(), const_cast<void**>(&frames[skip_frames]),
349       const_cast<void**>(&frames[std::max<size_t>(frame_count, skip_frames)]));
350 #endif
351 }
352 
DoRecordAlloc(size_t total_allocated,size_t size,void * address,uint32_t skip_frames)353 void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
354                                          size_t size,
355                                          void* address,
356                                          uint32_t skip_frames) {
357   if (entered_.Get())
358     return;
359   entered_.Set(true);
360   {
361     base::AutoLock lock(mutex_);
362     Sample sample(size, total_allocated, ++last_sample_ordinal_);
363     RecordStackTrace(&sample, skip_frames);
364     for (auto* observer : observers_)
365       observer->SampleAdded(sample.ordinal, size, total_allocated);
366     samples_.emplace(address, std::move(sample));
367     // TODO(alph): Sometimes RecordAlloc is called twice in a row without
368     // a RecordFree in between. Investigate it.
369     if (!sampled_addresses_set().Contains(address))
370       sampled_addresses_set().Insert(address);
371     BalanceAddressesHashSet();
372   }
373   entered_.Set(false);
374 }
375 
376 // static
RecordFree(void * address)377 void SamplingHeapProfiler::RecordFree(void* address) {
378   if (UNLIKELY(address == nullptr))
379     return;
380   if (UNLIKELY(sampled_addresses_set().Contains(address)))
381     instance_->DoRecordFree(address);
382 }
383 
DoRecordFree(void * address)384 void SamplingHeapProfiler::DoRecordFree(void* address) {
385   if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
386     return;
387   if (entered_.Get())
388     return;
389   entered_.Set(true);
390   {
391     base::AutoLock lock(mutex_);
392     auto it = samples_.find(address);
393     CHECK(it != samples_.end());
394     for (auto* observer : observers_)
395       observer->SampleRemoved(it->second.ordinal);
396     samples_.erase(it);
397     sampled_addresses_set().Remove(address);
398   }
399   entered_.Set(false);
400 }
401 
BalanceAddressesHashSet()402 void SamplingHeapProfiler::BalanceAddressesHashSet() {
403   // Check if the load_factor of the current addresses hash set becomes higher
404   // than 1, allocate a new twice larger one, copy all the data,
405   // and switch to using it.
406   // During the copy process no other writes are made to both sets
407   // as it's behind the lock.
408   // All the readers continue to use the old one until the atomic switch
409   // process takes place.
410   LockFreeAddressHashSet& current_set = sampled_addresses_set();
411   if (current_set.load_factor() < 1)
412     return;
413   auto new_set =
414       std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
415   new_set->Copy(current_set);
416   // Atomically switch all the new readers to the new set.
417   base::subtle::Release_Store(&g_sampled_addresses_set,
418                               reinterpret_cast<AtomicWord>(new_set.get()));
419   // We still have to keep all the old maps alive to resolve the theoretical
420   // race with readers in |RecordFree| that have already obtained the map,
421   // but haven't yet managed to access it.
422   sampled_addresses_stack_.push(std::move(new_set));
423 }
424 
425 // static
sampled_addresses_set()426 LockFreeAddressHashSet& SamplingHeapProfiler::sampled_addresses_set() {
427   return *reinterpret_cast<LockFreeAddressHashSet*>(
428       base::subtle::NoBarrier_Load(&g_sampled_addresses_set));
429 }
430 
431 // static
GetInstance()432 SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() {
433   static base::NoDestructor<SamplingHeapProfiler> instance;
434   return instance.get();
435 }
436 
437 // static
SuppressRandomnessForTest(bool suppress)438 void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) {
439   g_deterministic = suppress;
440 }
441 
AddSamplesObserver(SamplesObserver * observer)442 void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) {
443   CHECK(!entered_.Get());
444   entered_.Set(true);
445   {
446     base::AutoLock lock(mutex_);
447     observers_.push_back(observer);
448   }
449   entered_.Set(false);
450 }
451 
RemoveSamplesObserver(SamplesObserver * observer)452 void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) {
453   CHECK(!entered_.Get());
454   entered_.Set(true);
455   {
456     base::AutoLock lock(mutex_);
457     auto it = std::find(observers_.begin(), observers_.end(), observer);
458     CHECK(it != observers_.end());
459     observers_.erase(it);
460   }
461   entered_.Set(false);
462 }
463 
GetSamples(uint32_t profile_id)464 std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
465     uint32_t profile_id) {
466   CHECK(!entered_.Get());
467   entered_.Set(true);
468   std::vector<Sample> samples;
469   {
470     base::AutoLock lock(mutex_);
471     for (auto& it : samples_) {
472       Sample& sample = it.second;
473       if (sample.ordinal > profile_id)
474         samples.push_back(sample);
475     }
476   }
477   entered_.Set(false);
478   return samples;
479 }
480 
481 }  // namespace base
482