/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_RUNTIME_GC_HEAP_INL_H_ #define ART_RUNTIME_GC_HEAP_INL_H_ #include "heap.h" #include "allocation_listener.h" #include "base/quasi_atomic.h" #include "base/time_utils.h" #include "gc/accounting/atomic_stack.h" #include "gc/accounting/card_table-inl.h" #include "gc/allocation_record.h" #include "gc/collector/semi_space.h" #include "gc/space/bump_pointer_space-inl.h" #include "gc/space/dlmalloc_space-inl.h" #include "gc/space/large_object_space.h" #include "gc/space/region_space-inl.h" #include "gc/space/rosalloc_space-inl.h" #include "handle_scope-inl.h" #include "obj_ptr-inl.h" #include "runtime.h" #include "thread-inl.h" #include "verify_object.h" #include "write_barrier-inl.h" namespace art { namespace gc { template inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, ObjPtr klass, size_t byte_count, AllocatorType allocator, const PreFenceVisitor& pre_fence_visitor) { auto no_suspend_pre_fence_visitor = [&pre_fence_visitor](auto... x) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedAssertNoThreadSuspension sants("No thread suspension during pre-fence visitor"); pre_fence_visitor(x...); }; if (kIsDebugBuild) { CheckPreconditionsForAllocObject(klass, byte_count); // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are // done in the runnable state where suspension is expected. CHECK_EQ(self->GetState(), kRunnable); self->AssertThreadSuspensionIsAllowable(); self->AssertNoPendingException(); // Make sure to preserve klass. StackHandleScope<1> hs(self); HandleWrapperObjPtr h = hs.NewHandleWrapper(&klass); self->PoisonObjectPointers(); } auto pre_object_allocated = [&]() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_ /* only suspends if kInstrumented */) { if constexpr (kInstrumented) { AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst); if (UNLIKELY(l != nullptr) && UNLIKELY(l->HasPreAlloc())) { StackHandleScope<1> hs(self); HandleWrapperObjPtr h_klass(hs.NewHandleWrapper(&klass)); l->PreObjectAllocated(self, h_klass, &byte_count); } } }; ObjPtr obj; // bytes allocated for the (individual) object. size_t bytes_allocated; size_t usable_size; size_t new_num_bytes_allocated = 0; bool need_gc = false; uint32_t starting_gc_num; // o.w. GC number at which we observed need for GC. { // Bytes allocated that includes bulk thread-local buffer allocations in addition to direct // non-TLAB object allocations. Only set for non-thread-local allocation, size_t bytes_tl_bulk_allocated = 0u; // Do the initial pre-alloc // TODO: Consider what happens if the allocator is switched while suspended here. pre_object_allocated(); // Need to check that we aren't the large object allocator since the large object allocation // code path includes this function. If we didn't check we would have an infinite loop. if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) { // AllocLargeObject can suspend and will recall PreObjectAllocated if needed. obj = AllocLargeObject(self, &klass, byte_count, pre_fence_visitor); if (obj != nullptr) { return obj.Ptr(); } // There should be an OOM exception, since we are retrying, clear it. self->ClearException(); // If the large object allocation failed, try to use the normal spaces (main space, // non moving space). This can happen if there is significant virtual address space // fragmentation. // kInstrumented may be out of date, so recurse without large object checking, rather than // continue. return AllocObjectWithAllocator (self, klass, byte_count, GetUpdatedAllocator(allocator), pre_fence_visitor); } ScopedAssertNoThreadSuspension ants("Called PreObjectAllocated, no suspend until alloc"); if (IsTLABAllocator(allocator)) { byte_count = RoundUp(byte_count, space::BumpPointerSpace::kAlignment); } // If we have a thread local allocation we don't need to update bytes allocated. if (IsTLABAllocator(allocator) && byte_count <= self->TlabSize()) { obj = self->AllocTlab(byte_count); DCHECK(obj != nullptr) << "AllocTlab can't fail"; obj->SetClass(klass); if (kUseBakerReadBarrier) { obj->AssertReadBarrierState(); } bytes_allocated = byte_count; usable_size = bytes_allocated; no_suspend_pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); } else if ( !kInstrumented && allocator == kAllocatorTypeRosAlloc && (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr && LIKELY(obj != nullptr)) { DCHECK(!is_running_on_memory_tool_); obj->SetClass(klass); if (kUseBakerReadBarrier) { obj->AssertReadBarrierState(); } usable_size = bytes_allocated; no_suspend_pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); } else { obj = TryToAllocate(self, allocator, byte_count, &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated); if (UNLIKELY(obj == nullptr)) { // AllocateInternalWithGc internally re-allows, and can cause, thread suspension, if // someone instruments the entrypoints or changes the allocator in a suspend point here, // we need to retry the allocation. It will send the pre-alloc event again. obj = AllocateInternalWithGc(self, allocator, kInstrumented, byte_count, &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated, &klass); if (obj == nullptr) { // The only way that we can get a null return if there is no pending exception is if the // allocator or instrumentation changed. if (!self->IsExceptionPending()) { // Since we are restarting, allow thread suspension. ScopedAllowThreadSuspension ats; // AllocObject will pick up the new allocator type, and instrumented as true is the safe // default. return AllocObjectWithAllocator(self, klass, byte_count, GetUpdatedAllocator(allocator), pre_fence_visitor); } return nullptr; } // Non-null result implies neither instrumentation nor allocator changed. } DCHECK_GT(bytes_allocated, 0u); DCHECK_GT(usable_size, 0u); obj->SetClass(klass); if (kUseBakerReadBarrier) { obj->AssertReadBarrierState(); } if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) { // (Note this if statement will be constant folded away for the fast-path quick entry // points.) Because SetClass() has no write barrier, the GC may need a write barrier in the // case the object is non movable and points to a recently allocated movable class. WriteBarrier::ForFieldWrite(obj, mirror::Object::ClassOffset(), klass); } no_suspend_pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); } if (bytes_tl_bulk_allocated > 0) { starting_gc_num = GetCurrentGcNum(); size_t num_bytes_allocated_before = num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed); new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated; // Only trace when we get an increase in the number of bytes allocated. This happens when // obtaining a new TLAB and isn't often enough to hurt performance according to golem. if (region_space_) { // With CC collector, during a GC cycle, the heap usage increases as // there are two copies of evacuated objects. Therefore, add evac-bytes // to the heap size. When the GC cycle is not running, evac-bytes // are 0, as required. TraceHeapSize(new_num_bytes_allocated + region_space_->EvacBytes()); } else { TraceHeapSize(new_num_bytes_allocated); } // IsGcConcurrent() isn't known at compile time so we can optimize by not checking it for the // BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant // since the allocator_type should be constant propagated. if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent() && UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) { need_gc = true; } GetMetrics()->TotalBytesAllocated()->Add(bytes_tl_bulk_allocated); } } if (kIsDebugBuild && Runtime::Current()->IsStarted()) { CHECK_LE(obj->SizeOf(), usable_size); } // TODO: Deprecate. if (kInstrumented) { if (Runtime::Current()->HasStatsEnabled()) { RuntimeStats* thread_stats = self->GetStats(); ++thread_stats->allocated_objects; thread_stats->allocated_bytes += bytes_allocated; RuntimeStats* global_stats = Runtime::Current()->GetStats(); ++global_stats->allocated_objects; global_stats->allocated_bytes += bytes_allocated; } } else { DCHECK(!Runtime::Current()->HasStatsEnabled()); } if (kInstrumented) { if (IsAllocTrackingEnabled()) { // allocation_records_ is not null since it never becomes null after allocation tracking is // enabled. DCHECK(allocation_records_ != nullptr); allocation_records_->RecordAllocation(self, &obj, bytes_allocated); } AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst); if (l != nullptr) { // Same as above. We assume that a listener that was once stored will never be deleted. // Otherwise we'd have to perform this under a lock. l->ObjectAllocated(self, &obj, bytes_allocated); } } else { DCHECK(!IsAllocTrackingEnabled()); } if (AllocatorHasAllocationStack(allocator)) { PushOnAllocationStack(self, &obj); } if (kInstrumented) { if (gc_stress_mode_) { CheckGcStressMode(self, &obj); } } else { DCHECK(!gc_stress_mode_); } if (need_gc) { // Do this only once thread suspension is allowed again, and we're done with kInstrumented. RequestConcurrentGCAndSaveObject(self, /*force_full=*/ false, starting_gc_num, &obj); } VerifyObject(obj); self->VerifyStack(); return obj.Ptr(); } // The size of a thread-local allocation stack in the number of references. static constexpr size_t kThreadLocalAllocationStackSize = 128; inline void Heap::PushOnAllocationStack(Thread* self, ObjPtr* obj) { if (kUseThreadLocalAllocationStack) { if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(obj->Ptr()))) { PushOnThreadLocalAllocationStackWithInternalGC(self, obj); } } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(obj->Ptr()))) { PushOnAllocationStackWithInternalGC(self, obj); } } template inline mirror::Object* Heap::AllocLargeObject(Thread* self, ObjPtr* klass, size_t byte_count, const PreFenceVisitor& pre_fence_visitor) { // Save and restore the class in case it moves. StackHandleScope<1> hs(self); auto klass_wrapper = hs.NewHandleWrapper(klass); mirror::Object* obj = AllocObjectWithAllocator (self, *klass, byte_count, kAllocatorTypeLOS, pre_fence_visitor); // Java Heap Profiler check and sample allocation. JHPCheckNonTlabSampleAllocation(self, obj, byte_count); return obj; } template inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) { if (allocator_type != kAllocatorTypeRegionTLAB && allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRosAlloc && UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) { return nullptr; } mirror::Object* ret; switch (allocator_type) { case kAllocatorTypeBumpPointer: { DCHECK(bump_pointer_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment); ret = bump_pointer_space_->AllocNonvirtual(alloc_size); if (LIKELY(ret != nullptr)) { *bytes_allocated = alloc_size; *usable_size = alloc_size; *bytes_tl_bulk_allocated = alloc_size; } break; } case kAllocatorTypeRosAlloc: { if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) { // If running on ASan, we should be using the instrumented path. size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size); if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, max_bytes_tl_bulk_allocated, kGrow))) { return nullptr; } ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { DCHECK(!is_running_on_memory_tool_); size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size); if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, max_bytes_tl_bulk_allocated, kGrow))) { return nullptr; } if (!kInstrumented) { DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size)); } ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } break; } case kAllocatorTypeDlMalloc: { if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) { // If running on ASan, we should be using the instrumented path. ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { DCHECK(!is_running_on_memory_tool_); ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } break; } case kAllocatorTypeNonMoving: { ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); break; } case kAllocatorTypeLOS: { ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); // Note that the bump pointer spaces aren't necessarily next to // the other continuous spaces like the non-moving alloc space or // the zygote space. DCHECK(ret == nullptr || large_object_space_->Contains(ret)); break; } case kAllocatorTypeRegion: { DCHECK(region_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment); ret = region_space_->AllocNonvirtual(alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); break; } case kAllocatorTypeTLAB: FALLTHROUGH_INTENDED; case kAllocatorTypeRegionTLAB: { DCHECK_ALIGNED(alloc_size, kObjectAlignment); static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment, "mismatched alignments"); static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment, "mismatched alignments"); if (UNLIKELY(self->TlabSize() < alloc_size)) { return AllocWithNewTLAB(self, allocator_type, alloc_size, kGrow, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); *bytes_allocated = alloc_size; *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. *usable_size = alloc_size; break; } default: { LOG(FATAL) << "Invalid allocator type"; ret = nullptr; } } return ret; } inline bool Heap::ShouldAllocLargeObject(ObjPtr c, size_t byte_count) const { // We need to have a zygote space or else our newly allocated large object can end up in the // Zygote resulting in it being prematurely freed. // We can only do this for primitive objects since large objects will not be within the card table // range. This also means that we rely on SetClass not dirtying the object's card. return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass()); } inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size, bool grow) { size_t old_target = target_footprint_.load(std::memory_order_relaxed); while (true) { size_t old_allocated = num_bytes_allocated_.load(std::memory_order_relaxed); size_t new_footprint = old_allocated + alloc_size; // Tests against heap limits are inherently approximate, since multiple allocations may // race, and this is not atomic with the allocation. if (UNLIKELY(new_footprint <= old_target)) { return false; } else if (UNLIKELY(new_footprint > growth_limit_)) { return true; } // We are between target_footprint_ and growth_limit_ . if (AllocatorMayHaveConcurrentGC(allocator_type) && IsGcConcurrent()) { return false; } else { if (grow) { if (target_footprint_.compare_exchange_weak(/*inout ref*/old_target, new_footprint, std::memory_order_relaxed)) { VlogHeapGrowth(old_target, new_footprint, alloc_size); return false; } // else try again. } else { return true; } } } } inline bool Heap::ShouldConcurrentGCForJava(size_t new_num_bytes_allocated) { // For a Java allocation, we only check whether the number of Java allocated bytes excceeds a // threshold. By not considering native allocation here, we (a) ensure that Java heap bounds are // maintained, and (b) reduce the cost of the check here. return new_num_bytes_allocated >= concurrent_start_bytes_; } } // namespace gc } // namespace art #endif // ART_RUNTIME_GC_HEAP_INL_H_