1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_GC_HEAP_INL_H_
18 #define ART_RUNTIME_GC_HEAP_INL_H_
19 
20 #include "heap.h"
21 
22 #include "allocation_listener.h"
23 #include "base/quasi_atomic.h"
24 #include "base/time_utils.h"
25 #include "gc/accounting/atomic_stack.h"
26 #include "gc/accounting/card_table-inl.h"
27 #include "gc/allocation_record.h"
28 #include "gc/collector/semi_space.h"
29 #include "gc/space/bump_pointer_space-inl.h"
30 #include "gc/space/dlmalloc_space-inl.h"
31 #include "gc/space/large_object_space.h"
32 #include "gc/space/region_space-inl.h"
33 #include "gc/space/rosalloc_space-inl.h"
34 #include "handle_scope-inl.h"
35 #include "obj_ptr-inl.h"
36 #include "runtime.h"
37 #include "thread-inl.h"
38 #include "verify_object.h"
39 #include "write_barrier-inl.h"
40 
41 namespace art {
42 namespace gc {
43 
44 template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
AllocObjectWithAllocator(Thread * self,ObjPtr<mirror::Class> klass,size_t byte_count,AllocatorType allocator,const PreFenceVisitor & pre_fence_visitor)45 inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self,
46                                                       ObjPtr<mirror::Class> klass,
47                                                       size_t byte_count,
48                                                       AllocatorType allocator,
49                                                       const PreFenceVisitor& pre_fence_visitor) {
50   auto no_suspend_pre_fence_visitor =
51       [&pre_fence_visitor](auto... x) REQUIRES_SHARED(Locks::mutator_lock_) {
52         ScopedAssertNoThreadSuspension sants("No thread suspension during pre-fence visitor");
53         pre_fence_visitor(x...);
54       };
55 
56   if (kIsDebugBuild) {
57     CheckPreconditionsForAllocObject(klass, byte_count);
58     // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
59     // done in the runnable state where suspension is expected.
60     CHECK_EQ(self->GetState(), kRunnable);
61     self->AssertThreadSuspensionIsAllowable();
62     self->AssertNoPendingException();
63     // Make sure to preserve klass.
64     StackHandleScope<1> hs(self);
65     HandleWrapperObjPtr<mirror::Class> h = hs.NewHandleWrapper(&klass);
66     self->PoisonObjectPointers();
67   }
68   auto pre_object_allocated = [&]() REQUIRES_SHARED(Locks::mutator_lock_)
69       REQUIRES(!Roles::uninterruptible_ /* only suspends if kInstrumented */) {
70     if constexpr (kInstrumented) {
71       AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
72       if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) {
73         StackHandleScope<1> hs(self);
74         HandleWrapperObjPtr<mirror::Class> h_klass(hs.NewHandleWrapper(&klass));
75         l->PreObjectAllocated(self, h_klass, &byte_count);
76       }
77     }
78   };
79   ObjPtr<mirror::Object> obj;
80   // bytes allocated for the (individual) object.
81   size_t bytes_allocated;
82   size_t usable_size;
83   size_t new_num_bytes_allocated = 0;
84   bool need_gc = false;
85   uint32_t starting_gc_num;  // o.w. GC number at which we observed need for GC.
86   {
87     // Bytes allocated that includes bulk thread-local buffer allocations in addition to direct
88     // non-TLAB object allocations. Only set for non-thread-local allocation,
89     size_t bytes_tl_bulk_allocated = 0u;
90     // Do the initial pre-alloc
91     // TODO: Consider what happens if the allocator is switched while suspended here.
92     pre_object_allocated();
93 
94     // Need to check that we aren't the large object allocator since the large object allocation
95     // code path includes this function. If we didn't check we would have an infinite loop.
96     if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
97       // AllocLargeObject can suspend and will recall PreObjectAllocated if needed.
98       obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
99                                                              pre_fence_visitor);
100       if (obj != nullptr) {
101         return obj.Ptr();
102       }
103       // There should be an OOM exception, since we are retrying, clear it.
104       self->ClearException();
105 
106       // If the large object allocation failed, try to use the normal spaces (main space,
107       // non moving space). This can happen if there is significant virtual address space
108       // fragmentation.
109       // kInstrumented may be out of date, so recurse without large object checking, rather than
110       // continue.
111       return AllocObjectWithAllocator</*kInstrumented=*/ true, /*kCheckLargeObject=*/ false>
112           (self, klass, byte_count, GetUpdatedAllocator(allocator), pre_fence_visitor);
113     }
114     ScopedAssertNoThreadSuspension ants("Called PreObjectAllocated, no suspend until alloc");
115     if (IsTLABAllocator(allocator)) {
116       byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment);
117     }
118     // If we have a thread local allocation we don't need to update bytes allocated.
119     if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) {
120       obj = self->AllocTlab(byte_count);
121       DCHECK(obj != nullptr) << "AllocTlab can't fail";
122       obj->SetClass(klass);
123       if (kUseBakerReadBarrier) {
124         obj->AssertReadBarrierState();
125       }
126       bytes_allocated = byte_count;
127       usable_size = bytes_allocated;
128       no_suspend_pre_fence_visitor(obj, usable_size);
129       QuasiAtomic::ThreadFenceForConstructor();
130     } else if (
131         !kInstrumented && allocator == kAllocatorTypeRosAlloc &&
132         (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr &&
133         LIKELY(obj != nullptr)) {
134       DCHECK(!is_running_on_memory_tool_);
135       obj->SetClass(klass);
136       if (kUseBakerReadBarrier) {
137         obj->AssertReadBarrierState();
138       }
139       usable_size = bytes_allocated;
140       no_suspend_pre_fence_visitor(obj, usable_size);
141       QuasiAtomic::ThreadFenceForConstructor();
142     } else {
143       obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated,
144                                                 &usable_size, &bytes_tl_bulk_allocated);
145       if (UNLIKELY(obj == nullptr)) {
146         // AllocateInternalWithGc internally re-allows, and can cause, thread suspension, if
147         // someone instruments the entrypoints or changes the allocator in a suspend point here,
148         // we need to retry the allocation. It will send the pre-alloc event again.
149         obj = AllocateInternalWithGc(self,
150                                      allocator,
151                                      kInstrumented,
152                                      byte_count,
153                                      &bytes_allocated,
154                                      &usable_size,
155                                      &bytes_tl_bulk_allocated,
156                                      &klass);
157         if (obj == nullptr) {
158           // The only way that we can get a null return if there is no pending exception is if the
159           // allocator or instrumentation changed.
160           if (!self->IsExceptionPending()) {
161             // Since we are restarting, allow thread suspension.
162             ScopedAllowThreadSuspension ats;
163             // AllocObject will pick up the new allocator type, and instrumented as true is the safe
164             // default.
165             return AllocObjectWithAllocator</*kInstrumented=*/true>(self,
166                                                                     klass,
167                                                                     byte_count,
168                                                                     GetUpdatedAllocator(allocator),
169                                                                     pre_fence_visitor);
170           }
171           return nullptr;
172         }
173         // Non-null result implies neither instrumentation nor allocator changed.
174       }
175       DCHECK_GT(bytes_allocated, 0u);
176       DCHECK_GT(usable_size, 0u);
177       obj->SetClass(klass);
178       if (kUseBakerReadBarrier) {
179         obj->AssertReadBarrierState();
180       }
181       if (collector::SemiSpace::kUseRememberedSet &&
182           UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
183         // (Note this if statement will be constant folded away for the fast-path quick entry
184         // points.) Because SetClass() has no write barrier, the GC may need a write barrier in the
185         // case the object is non movable and points to a recently allocated movable class.
186         WriteBarrier::ForFieldWrite(obj, mirror::Object::ClassOffset(), klass);
187       }
188       no_suspend_pre_fence_visitor(obj, usable_size);
189       QuasiAtomic::ThreadFenceForConstructor();
190     }
191     if (bytes_tl_bulk_allocated > 0) {
192       starting_gc_num = GetCurrentGcNum();
193       size_t num_bytes_allocated_before =
194           num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed);
195       new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated;
196       // Only trace when we get an increase in the number of bytes allocated. This happens when
197       // obtaining a new TLAB and isn't often enough to hurt performance according to golem.
198       if (region_space_) {
199         // With CC collector, during a GC cycle, the heap usage increases as
200         // there are two copies of evacuated objects. Therefore, add evac-bytes
201         // to the heap size. When the GC cycle is not running, evac-bytes
202         // are 0, as required.
203         TraceHeapSize(new_num_bytes_allocated + region_space_->EvacBytes());
204       } else {
205         TraceHeapSize(new_num_bytes_allocated);
206       }
207       // IsGcConcurrent() isn't known at compile time so we can optimize by not checking it for the
208       // BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
209       // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant
210       // since the allocator_type should be constant propagated.
211       if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent()
212           && UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) {
213         need_gc = true;
214       }
215       GetMetrics()->TotalBytesAllocated()->Add(bytes_tl_bulk_allocated);
216     }
217   }
218   if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
219     CHECK_LE(obj->SizeOf(), usable_size);
220   }
221   // TODO: Deprecate.
222   if (kInstrumented) {
223     if (Runtime::Current()->HasStatsEnabled()) {
224       RuntimeStats* thread_stats = self->GetStats();
225       ++thread_stats->allocated_objects;
226       thread_stats->allocated_bytes += bytes_allocated;
227       RuntimeStats* global_stats = Runtime::Current()->GetStats();
228       ++global_stats->allocated_objects;
229       global_stats->allocated_bytes += bytes_allocated;
230     }
231   } else {
232     DCHECK(!Runtime::Current()->HasStatsEnabled());
233   }
234   if (kInstrumented) {
235     if (IsAllocTrackingEnabled()) {
236       // allocation_records_ is not null since it never becomes null after allocation tracking is
237       // enabled.
238       DCHECK(allocation_records_ != nullptr);
239       allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
240     }
241     AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
242     if (l != nullptr) {
243       // Same as above. We assume that a listener that was once stored will never be deleted.
244       // Otherwise we'd have to perform this under a lock.
245       l->ObjectAllocated(self, &obj, bytes_allocated);
246     }
247   } else {
248     DCHECK(!IsAllocTrackingEnabled());
249   }
250   if (AllocatorHasAllocationStack(allocator)) {
251     PushOnAllocationStack(self, &obj);
252   }
253   if (kInstrumented) {
254     if (gc_stress_mode_) {
255       CheckGcStressMode(self, &obj);
256     }
257   } else {
258     DCHECK(!gc_stress_mode_);
259   }
260   if (need_gc) {
261     // Do this only once thread suspension is allowed again, and we're done with kInstrumented.
262     RequestConcurrentGCAndSaveObject(self, /*force_full=*/ false, starting_gc_num, &obj);
263   }
264   VerifyObject(obj);
265   self->VerifyStack();
266   return obj.Ptr();
267 }
268 
269 // The size of a thread-local allocation stack in the number of references.
270 static constexpr size_t kThreadLocalAllocationStackSize = 128;
271 
PushOnAllocationStack(Thread * self,ObjPtr<mirror::Object> * obj)272 inline void Heap::PushOnAllocationStack(Thread* self, ObjPtr<mirror::Object>* obj) {
273   if (kUseThreadLocalAllocationStack) {
274     if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(obj->Ptr()))) {
275       PushOnThreadLocalAllocationStackWithInternalGC(self, obj);
276     }
277   } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(obj->Ptr()))) {
278     PushOnAllocationStackWithInternalGC(self, obj);
279   }
280 }
281 
282 template <bool kInstrumented, typename PreFenceVisitor>
AllocLargeObject(Thread * self,ObjPtr<mirror::Class> * klass,size_t byte_count,const PreFenceVisitor & pre_fence_visitor)283 inline mirror::Object* Heap::AllocLargeObject(Thread* self,
284                                               ObjPtr<mirror::Class>* klass,
285                                               size_t byte_count,
286                                               const PreFenceVisitor& pre_fence_visitor) {
287   // Save and restore the class in case it moves.
288   StackHandleScope<1> hs(self);
289   auto klass_wrapper = hs.NewHandleWrapper(klass);
290   mirror::Object* obj = AllocObjectWithAllocator<kInstrumented, false, PreFenceVisitor>
291                         (self, *klass, byte_count, kAllocatorTypeLOS, pre_fence_visitor);
292   // Java Heap Profiler check and sample allocation.
293   JHPCheckNonTlabSampleAllocation(self, obj, byte_count);
294   return obj;
295 }
296 
297 template <const bool kInstrumented, const bool kGrow>
TryToAllocate(Thread * self,AllocatorType allocator_type,size_t alloc_size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)298 inline mirror::Object* Heap::TryToAllocate(Thread* self,
299                                            AllocatorType allocator_type,
300                                            size_t alloc_size,
301                                            size_t* bytes_allocated,
302                                            size_t* usable_size,
303                                            size_t* bytes_tl_bulk_allocated) {
304   if (allocator_type != kAllocatorTypeRegionTLAB &&
305       allocator_type != kAllocatorTypeTLAB &&
306       allocator_type != kAllocatorTypeRosAlloc &&
307       UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
308     return nullptr;
309   }
310   mirror::Object* ret;
311   switch (allocator_type) {
312     case kAllocatorTypeBumpPointer: {
313       DCHECK(bump_pointer_space_ != nullptr);
314       alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
315       ret = bump_pointer_space_->AllocNonvirtual(alloc_size);
316       if (LIKELY(ret != nullptr)) {
317         *bytes_allocated = alloc_size;
318         *usable_size = alloc_size;
319         *bytes_tl_bulk_allocated = alloc_size;
320       }
321       break;
322     }
323     case kAllocatorTypeRosAlloc: {
324       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
325         // If running on ASan, we should be using the instrumented path.
326         size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
327         if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
328                                                max_bytes_tl_bulk_allocated,
329                                                kGrow))) {
330           return nullptr;
331         }
332         ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
333                                      bytes_tl_bulk_allocated);
334       } else {
335         DCHECK(!is_running_on_memory_tool_);
336         size_t max_bytes_tl_bulk_allocated =
337             rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
338         if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
339                                                max_bytes_tl_bulk_allocated,
340                                                kGrow))) {
341           return nullptr;
342         }
343         if (!kInstrumented) {
344           DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
345         }
346         ret = rosalloc_space_->AllocNonvirtual(self,
347                                                alloc_size,
348                                                bytes_allocated,
349                                                usable_size,
350                                                bytes_tl_bulk_allocated);
351       }
352       break;
353     }
354     case kAllocatorTypeDlMalloc: {
355       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
356         // If running on ASan, we should be using the instrumented path.
357         ret = dlmalloc_space_->Alloc(self,
358                                      alloc_size,
359                                      bytes_allocated,
360                                      usable_size,
361                                      bytes_tl_bulk_allocated);
362       } else {
363         DCHECK(!is_running_on_memory_tool_);
364         ret = dlmalloc_space_->AllocNonvirtual(self,
365                                                alloc_size,
366                                                bytes_allocated,
367                                                usable_size,
368                                                bytes_tl_bulk_allocated);
369       }
370       break;
371     }
372     case kAllocatorTypeNonMoving: {
373       ret = non_moving_space_->Alloc(self,
374                                      alloc_size,
375                                      bytes_allocated,
376                                      usable_size,
377                                      bytes_tl_bulk_allocated);
378       break;
379     }
380     case kAllocatorTypeLOS: {
381       ret = large_object_space_->Alloc(self,
382                                        alloc_size,
383                                        bytes_allocated,
384                                        usable_size,
385                                        bytes_tl_bulk_allocated);
386       // Note that the bump pointer spaces aren't necessarily next to
387       // the other continuous spaces like the non-moving alloc space or
388       // the zygote space.
389       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
390       break;
391     }
392     case kAllocatorTypeRegion: {
393       DCHECK(region_space_ != nullptr);
394       alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
395       ret = region_space_->AllocNonvirtual<false>(alloc_size,
396                                                   bytes_allocated,
397                                                   usable_size,
398                                                   bytes_tl_bulk_allocated);
399       break;
400     }
401     case kAllocatorTypeTLAB:
402       FALLTHROUGH_INTENDED;
403     case kAllocatorTypeRegionTLAB: {
404       DCHECK_ALIGNED(alloc_size, kObjectAlignment);
405       static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
406                     "mismatched alignments");
407       static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
408                     "mismatched alignments");
409       if (UNLIKELY(self->TlabSize() < alloc_size)) {
410         return AllocWithNewTLAB(self,
411                                 allocator_type,
412                                 alloc_size,
413                                 kGrow,
414                                 bytes_allocated,
415                                 usable_size,
416                                 bytes_tl_bulk_allocated);
417       }
418       // The allocation can't fail.
419       ret = self->AllocTlab(alloc_size);
420       DCHECK(ret != nullptr);
421       *bytes_allocated = alloc_size;
422       *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
423       *usable_size = alloc_size;
424       break;
425     }
426     default: {
427       LOG(FATAL) << "Invalid allocator type";
428       ret = nullptr;
429     }
430   }
431   return ret;
432 }
433 
ShouldAllocLargeObject(ObjPtr<mirror::Class> c,size_t byte_count)434 inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_count) const {
435   // We need to have a zygote space or else our newly allocated large object can end up in the
436   // Zygote resulting in it being prematurely freed.
437   // We can only do this for primitive objects since large objects will not be within the card table
438   // range. This also means that we rely on SetClass not dirtying the object's card.
439   return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
440 }
441 
IsOutOfMemoryOnAllocation(AllocatorType allocator_type,size_t alloc_size,bool grow)442 inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
443                                             size_t alloc_size,
444                                             bool grow) {
445   size_t old_target = target_footprint_.load(std::memory_order_relaxed);
446   while (true) {
447     size_t old_allocated = num_bytes_allocated_.load(std::memory_order_relaxed);
448     size_t new_footprint = old_allocated + alloc_size;
449     // Tests against heap limits are inherently approximate, since multiple allocations may
450     // race, and this is not atomic with the allocation.
451     if (UNLIKELY(new_footprint <= old_target)) {
452       return false;
453     } else if (UNLIKELY(new_footprint > growth_limit_)) {
454       return true;
455     }
456     // We are between target_footprint_ and growth_limit_ .
457     if (AllocatorMayHaveConcurrentGC(allocator_type) && IsGcConcurrent()) {
458       return false;
459     } else {
460       if (grow) {
461         if (target_footprint_.compare_exchange_weak(/*inout ref*/old_target, new_footprint,
462                                                     std::memory_order_relaxed)) {
463           VlogHeapGrowth(old_target, new_footprint, alloc_size);
464           return false;
465         }  // else try again.
466       } else {
467         return true;
468       }
469     }
470   }
471 }
472 
ShouldConcurrentGCForJava(size_t new_num_bytes_allocated)473 inline bool Heap::ShouldConcurrentGCForJava(size_t new_num_bytes_allocated) {
474   // For a Java allocation, we only check whether the number of Java allocated bytes excceeds a
475   // threshold. By not considering native allocation here, we (a) ensure that Java heap bounds are
476   // maintained, and (b) reduce the cost of the check here.
477   return new_num_bytes_allocated >= concurrent_start_bytes_;
478 }
479 
480 }  // namespace gc
481 }  // namespace art
482 
483 #endif  // ART_RUNTIME_GC_HEAP_INL_H_
484