1 /* 2 * Copyright 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 18 #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 19 20 #include <signal.h> 21 22 #include <map> 23 #include <memory> 24 #include <unordered_map> 25 #include <unordered_set> 26 27 #include "barrier.h" 28 #include "base/atomic.h" 29 #include "base/gc_visited_arena_pool.h" 30 #include "base/macros.h" 31 #include "base/mutex.h" 32 #include "garbage_collector.h" 33 #include "gc/accounting/atomic_stack.h" 34 #include "gc/accounting/bitmap-inl.h" 35 #include "gc/accounting/heap_bitmap.h" 36 #include "gc_root.h" 37 #include "immune_spaces.h" 38 #include "offsets.h" 39 40 namespace art HIDDEN { 41 42 EXPORT bool KernelSupportsUffd(); 43 44 namespace mirror { 45 class DexCache; 46 } // namespace mirror 47 48 namespace gc { 49 50 class Heap; 51 52 namespace space { 53 class BumpPointerSpace; 54 } // namespace space 55 56 namespace collector { 57 class MarkCompact final : public GarbageCollector { 58 public: 59 using SigbusCounterType = uint32_t; 60 61 static constexpr size_t kAlignment = kObjectAlignment; 62 static constexpr int kCopyMode = -1; 63 static constexpr int kMinorFaultMode = -2; 64 // Fake file descriptor for fall back mode (when uffd isn't available) 65 static constexpr int kFallbackMode = -3; 66 static constexpr int kFdSharedAnon = -1; 67 static constexpr int kFdUnused = -2; 68 69 // Bitmask for the compaction-done bit in the sigbus_in_progress_count_. 70 static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask = 71 1u << (BitSizeOf<SigbusCounterType>() - 1); 72 73 explicit MarkCompact(Heap* heap); 74 ~MarkCompact()75 ~MarkCompact() {} 76 77 void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_); 78 79 void ClampGrowthLimit(size_t new_capacity) REQUIRES(Locks::heap_bitmap_lock_); 80 // Updated before (or in) pre-compaction pause and is accessed only in the 81 // pause or during concurrent compaction. The flag is reset in next GC cycle's 82 // InitializePhase(). Therefore, it's safe to update without any memory ordering. IsCompacting()83 bool IsCompacting() const { return compacting_; } 84 IsUsingSigbusFeature()85 bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; } 86 87 // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which 88 // is asserted in the function. 89 bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; 90 GetGcType()91 GcType GetGcType() const override { 92 return kGcTypeFull; 93 } 94 GetCollectorType()95 CollectorType GetCollectorType() const override { 96 return kCollectorTypeCMC; 97 } 98 GetBarrier()99 Barrier& GetBarrier() { 100 return gc_barrier_; 101 } 102 103 mirror::Object* MarkObject(mirror::Object* obj) override 104 REQUIRES_SHARED(Locks::mutator_lock_) 105 REQUIRES(Locks::heap_bitmap_lock_); 106 107 void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj, 108 bool do_atomic_update) override 109 REQUIRES_SHARED(Locks::mutator_lock_) 110 REQUIRES(Locks::heap_bitmap_lock_); 111 112 void VisitRoots(mirror::Object*** roots, 113 size_t count, 114 const RootInfo& info) override 115 REQUIRES_SHARED(Locks::mutator_lock_) 116 REQUIRES(Locks::heap_bitmap_lock_); 117 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, 118 size_t count, 119 const RootInfo& info) override 120 REQUIRES_SHARED(Locks::mutator_lock_) 121 REQUIRES(Locks::heap_bitmap_lock_); 122 123 bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj, 124 bool do_atomic_update) override 125 REQUIRES_SHARED(Locks::mutator_lock_) 126 REQUIRES(Locks::heap_bitmap_lock_); 127 128 void RevokeAllThreadLocalBuffers() override; 129 130 void DelayReferenceReferent(ObjPtr<mirror::Class> klass, 131 ObjPtr<mirror::Reference> reference) override 132 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 133 134 mirror::Object* IsMarked(mirror::Object* obj) override 135 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 136 GetFromSpaceAddrFromBarrier(mirror::Object * old_ref)137 mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) { 138 CHECK(compacting_); 139 if (HasAddress(old_ref)) { 140 return GetFromSpaceAddr(old_ref); 141 } 142 return old_ref; 143 } 144 // Called from Heap::PostForkChildAction() for non-zygote processes and from 145 // PrepareForCompaction() for zygote processes. Returns true if uffd was 146 // created or was already done. 147 bool CreateUserfaultfd(bool post_fork); 148 149 // Returns a pair indicating if userfaultfd itself is available (first) and if 150 // so then whether its minor-fault feature is available or not (second). 151 static std::pair<bool, bool> GetUffdAndMinorFault(); 152 153 // Add linear-alloc space data when a new space is added to 154 // GcVisitedArenaPool, which mostly happens only once. 155 void AddLinearAllocSpaceData(uint8_t* begin, size_t len); 156 157 // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as 158 // it's given that processing thread also copies the page, thereby mapping it. 159 // The order is important as we may treat them as integers. Also 160 // 'kUnprocessed' should be set to 0 as we rely on madvise(dontneed) to return 161 // us zero'ed pages, which implicitly makes page-status initialized to 'kUnprocessed'. 162 enum class PageState : uint8_t { 163 kUnprocessed = 0, // Not processed yet. 164 kProcessing = 1, // Being processed by GC thread and will not be mapped 165 kProcessed = 2, // Processed but not mapped 166 kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped 167 kMutatorProcessing = 4, // Being processed by mutator thread 168 kProcessedAndMapping = 5, // Processed and will be mapped 169 kProcessedAndMapped = 6 // Processed and mapped. For SIGBUS. 170 }; 171 172 // Different heap clamping states. 173 enum class ClampInfoStatus : uint8_t { 174 kClampInfoNotDone, 175 kClampInfoPending, 176 kClampInfoFinished 177 }; 178 179 private: 180 using ObjReference = mirror::CompressedReference<mirror::Object>; 181 static constexpr uint32_t kPageStateMask = (1 << BitSizeOf<uint8_t>()) - 1; 182 // Number of bits (live-words) covered by a single chunk-info (below) 183 // entry/word. 184 // TODO: Since popcount is performed usomg SIMD instructions, we should 185 // consider using 128-bit in order to halve the chunk-info size. 186 static constexpr uint32_t kBitsPerVectorWord = kBitsPerIntPtrT; 187 static constexpr uint32_t kOffsetChunkSize = kBitsPerVectorWord * kAlignment; 188 static_assert(kOffsetChunkSize < kMinPageSize); 189 // Bitmap with bits corresponding to every live word set. For an object 190 // which is 4 words in size will have the corresponding 4 bits set. This is 191 // required for efficient computation of new-address (post-compaction) from 192 // the given old-address (pre-compaction). 193 template <size_t kAlignment> 194 class LiveWordsBitmap : private accounting::MemoryRangeBitmap<kAlignment> { 195 using Bitmap = accounting::Bitmap; 196 using MemRangeBitmap = accounting::MemoryRangeBitmap<kAlignment>; 197 198 public: 199 static_assert(IsPowerOfTwo(kBitsPerVectorWord)); 200 static_assert(IsPowerOfTwo(Bitmap::kBitsPerBitmapWord)); 201 static_assert(kBitsPerVectorWord >= Bitmap::kBitsPerBitmapWord); 202 static constexpr uint32_t kBitmapWordsPerVectorWord = 203 kBitsPerVectorWord / Bitmap::kBitsPerBitmapWord; 204 static_assert(IsPowerOfTwo(kBitmapWordsPerVectorWord)); 205 using MemRangeBitmap::SetBitmapSize; 206 static LiveWordsBitmap* Create(uintptr_t begin, uintptr_t end); 207 208 // Return offset (within the indexed chunk-info) of the nth live word. 209 uint32_t FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const; 210 // Sets all bits in the bitmap corresponding to the given range. Also 211 // returns the bit-index of the first word. 212 ALWAYS_INLINE uintptr_t SetLiveWords(uintptr_t begin, size_t size); 213 // Count number of live words upto the given bit-index. This is to be used 214 // to compute the post-compact address of an old reference. 215 ALWAYS_INLINE size_t CountLiveWordsUpto(size_t bit_idx) const; 216 // Call 'visitor' for every stride of contiguous marked bits in the live-words 217 // bitmap, starting from begin_bit_idx. Only visit 'bytes' live bytes or 218 // until 'end', whichever comes first. 219 // Visitor is called with index of the first marked bit in the stride, 220 // stride size and whether it's the last stride in the given range or not. 221 template <typename Visitor> 222 ALWAYS_INLINE void VisitLiveStrides(uintptr_t begin_bit_idx, 223 uint8_t* end, 224 const size_t bytes, 225 Visitor&& visitor) const 226 REQUIRES_SHARED(Locks::mutator_lock_); 227 // Count the number of live bytes in the given vector entry. 228 size_t LiveBytesInBitmapWord(size_t chunk_idx) const; ClearBitmap()229 void ClearBitmap() { Bitmap::Clear(); } Begin()230 ALWAYS_INLINE uintptr_t Begin() const { return MemRangeBitmap::CoverBegin(); } HasAddress(mirror::Object * obj)231 ALWAYS_INLINE bool HasAddress(mirror::Object* obj) const { 232 return MemRangeBitmap::HasAddress(reinterpret_cast<uintptr_t>(obj)); 233 } Test(uintptr_t bit_index)234 ALWAYS_INLINE bool Test(uintptr_t bit_index) const { 235 return Bitmap::TestBit(bit_index); 236 } Test(mirror::Object * obj)237 ALWAYS_INLINE bool Test(mirror::Object* obj) const { 238 return MemRangeBitmap::Test(reinterpret_cast<uintptr_t>(obj)); 239 } GetWord(size_t index)240 ALWAYS_INLINE uintptr_t GetWord(size_t index) const { 241 static_assert(kBitmapWordsPerVectorWord == 1); 242 return Bitmap::Begin()[index * kBitmapWordsPerVectorWord]; 243 } 244 }; 245 HasAddress(mirror::Object * obj,uint8_t * begin,uint8_t * end)246 static bool HasAddress(mirror::Object* obj, uint8_t* begin, uint8_t* end) { 247 uint8_t* ptr = reinterpret_cast<uint8_t*>(obj); 248 return ptr >= begin && ptr < end; 249 } 250 HasAddress(mirror::Object * obj)251 bool HasAddress(mirror::Object* obj) const { 252 return HasAddress(obj, moving_space_begin_, moving_space_end_); 253 } 254 // For a given object address in pre-compact space, return the corresponding 255 // address in the from-space, where heap pages are relocated in the compaction 256 // pause. GetFromSpaceAddr(mirror::Object * obj)257 mirror::Object* GetFromSpaceAddr(mirror::Object* obj) const { 258 DCHECK(HasAddress(obj)) << " obj=" << obj; 259 return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(obj) 260 + from_space_slide_diff_); 261 } 262 263 // Verifies that that given object reference refers to a valid object. 264 // Otherwise fataly dumps logs, including those from callback. 265 template <typename Callback> 266 void VerifyObject(mirror::Object* ref, Callback& callback) const 267 REQUIRES_SHARED(Locks::mutator_lock_); 268 // Check if the obj is within heap and has a klass which is likely to be valid 269 // mirror::Class. 270 bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); 271 void InitializePhase(); 272 void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_); 273 void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); 274 void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); 275 276 void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) 277 REQUIRES_SHARED(Locks::mutator_lock_) 278 REQUIRES(!Locks::heap_bitmap_lock_); 279 // Update the reference at given offset in the given object with post-compact 280 // address. [begin, end) is moving-space range. 281 ALWAYS_INLINE void UpdateRef(mirror::Object* obj, 282 MemberOffset offset, 283 uint8_t* begin, 284 uint8_t* end) REQUIRES_SHARED(Locks::mutator_lock_); 285 286 // Verify that the gc-root is updated only once. Returns false if the update 287 // shouldn't be done. 288 ALWAYS_INLINE bool VerifyRootSingleUpdate(void* root, 289 mirror::Object* old_ref, 290 const RootInfo& info) 291 REQUIRES_SHARED(Locks::mutator_lock_); 292 // Update the given root with post-compact address. [begin, end) is 293 // moving-space range. 294 ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root, 295 uint8_t* begin, 296 uint8_t* end, 297 const RootInfo& info = RootInfo(RootType::kRootUnknown)) 298 REQUIRES_SHARED(Locks::mutator_lock_); 299 ALWAYS_INLINE void UpdateRoot(mirror::Object** root, 300 uint8_t* begin, 301 uint8_t* end, 302 const RootInfo& info = RootInfo(RootType::kRootUnknown)) 303 REQUIRES_SHARED(Locks::mutator_lock_); 304 // Given the pre-compact address, the function returns the post-compact 305 // address of the given object. [begin, end) is moving-space range. 306 ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref, 307 uint8_t* begin, 308 uint8_t* end) const 309 REQUIRES_SHARED(Locks::mutator_lock_); 310 // Compute post-compact address of an object in moving space. This function 311 // assumes that old_ref is in moving space. 312 ALWAYS_INLINE mirror::Object* PostCompactAddressUnchecked(mirror::Object* old_ref) const 313 REQUIRES_SHARED(Locks::mutator_lock_); 314 // Compute the new address for an object which was allocated prior to starting 315 // this GC cycle. 316 ALWAYS_INLINE mirror::Object* PostCompactOldObjAddr(mirror::Object* old_ref) const 317 REQUIRES_SHARED(Locks::mutator_lock_); 318 // Compute the new address for an object which was black allocated during this 319 // GC cycle. 320 ALWAYS_INLINE mirror::Object* PostCompactBlackObjAddr(mirror::Object* old_ref) const 321 REQUIRES_SHARED(Locks::mutator_lock_); 322 // Clears (for alloc spaces in the beginning of marking phase) or ages the 323 // card table. Also, identifies immune spaces and mark bitmap. 324 void PrepareCardTableForMarking(bool clear_alloc_space_cards) 325 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); 326 327 // Perform one last round of marking, identifying roots from dirty cards 328 // during a stop-the-world (STW) pause. 329 void MarkingPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); 330 // Perform stop-the-world pause prior to concurrent compaction. 331 // Updates GC-roots and protects heap so that during the concurrent 332 // compaction phase we can receive faults and compact the corresponding pages 333 // on the fly. 334 void CompactionPause() REQUIRES(Locks::mutator_lock_); 335 // Compute offsets (in chunk_info_vec_) and other data structures required 336 // during concurrent compaction. 337 void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); 338 339 // Copy gPageSize live bytes starting from 'offset' (within the moving space), 340 // which must be within 'obj', into the gPageSize sized memory pointed by 'addr'. 341 // Then update the references within the copied objects. The boundary objects are 342 // partially updated such that only the references that lie in the page are updated. 343 // This is necessary to avoid cascading userfaults. 344 void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero) 345 REQUIRES_SHARED(Locks::mutator_lock_); 346 // Compact the bump-pointer space. Pass page that should be used as buffer for 347 // userfaultfd. 348 template <int kMode> 349 void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_); 350 351 // Compact the given page as per func and change its state. Also map/copy the 352 // page, if required. Returns true if the page was compacted, else false. 353 template <int kMode, typename CompactionFn> 354 ALWAYS_INLINE bool DoPageCompactionWithStateChange(size_t page_idx, 355 uint8_t* to_space_page, 356 uint8_t* page, 357 bool map_immediately, 358 CompactionFn func) 359 REQUIRES_SHARED(Locks::mutator_lock_); 360 361 // Update all the objects in the given non-moving space page. 'first' object 362 // could have started in some preceding page. 363 void UpdateNonMovingPage(mirror::Object* first, uint8_t* page) 364 REQUIRES_SHARED(Locks::mutator_lock_); 365 // Update all the references in the non-moving space. 366 void UpdateNonMovingSpace() REQUIRES_SHARED(Locks::mutator_lock_); 367 368 // For all the pages in non-moving space, find the first object that overlaps 369 // with the pages' start address, and store in first_objs_non_moving_space_ array. 370 void InitNonMovingSpaceFirstObjects() REQUIRES_SHARED(Locks::mutator_lock_); 371 // In addition to the first-objects for every post-compact moving space page, 372 // also find offsets within those objects from where the contents should be 373 // copied to the page. The offsets are relative to the moving-space's 374 // beginning. Store the computed first-object and offset in first_objs_moving_space_ 375 // and pre_compact_offset_moving_space_ respectively. 376 void InitMovingSpaceFirstObjects(const size_t vec_len) REQUIRES_SHARED(Locks::mutator_lock_); 377 378 // Gather the info related to black allocations from bump-pointer space to 379 // enable concurrent sliding of these pages. 380 void UpdateMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 381 // Update first-object info from allocation-stack for non-moving space black 382 // allocations. 383 void UpdateNonMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 384 385 // Slides (retain the empty holes, which are usually part of some in-use TLAB) 386 // black page in the moving space. 'first_obj' is the object that overlaps with 387 // the first byte of the page being slid. pre_compact_page is the pre-compact 388 // address of the page being slid. 'dest' is the gPageSize sized memory where 389 // the contents would be copied. 390 void SlideBlackPage(mirror::Object* first_obj, 391 mirror::Object* next_page_first_obj, 392 uint32_t first_chunk_size, 393 uint8_t* const pre_compact_page, 394 uint8_t* dest, 395 bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); 396 397 // Perform reference-processing and the likes before sweeping the non-movable 398 // spaces. 399 void ReclaimPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); 400 401 // Mark GC-roots (except from immune spaces and thread-stacks) during a STW pause. 402 void ReMarkRoots(Runtime* runtime) REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 403 // Concurrently mark GC-roots, except from immune spaces. 404 void MarkRoots(VisitRootFlags flags) REQUIRES_SHARED(Locks::mutator_lock_) 405 REQUIRES(Locks::heap_bitmap_lock_); 406 // Collect thread stack roots via a checkpoint. 407 void MarkRootsCheckpoint(Thread* self, Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) 408 REQUIRES(Locks::heap_bitmap_lock_); 409 // Second round of concurrent marking. Mark all gray objects that got dirtied 410 // since the first round. 411 void PreCleanCards() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); 412 413 void MarkNonThreadRoots(Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) 414 REQUIRES(Locks::heap_bitmap_lock_); 415 void MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) 416 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); 417 418 // Traverse through the reachable objects and mark them. 419 void MarkReachableObjects() REQUIRES_SHARED(Locks::mutator_lock_) 420 REQUIRES(Locks::heap_bitmap_lock_); 421 // Scan (only) immune spaces looking for references into the garbage collected 422 // spaces. 423 void UpdateAndMarkModUnion() REQUIRES_SHARED(Locks::mutator_lock_) 424 REQUIRES(Locks::heap_bitmap_lock_); 425 // Scan mod-union and card tables, covering all the spaces, to identify dirty objects. 426 // These are in 'minimum age' cards, which is 'kCardAged' in case of concurrent (second round) 427 // marking and kCardDirty during the STW pause. 428 void ScanDirtyObjects(bool paused, uint8_t minimum_age) REQUIRES_SHARED(Locks::mutator_lock_) 429 REQUIRES(Locks::heap_bitmap_lock_); 430 // Recursively mark dirty objects. Invoked both concurrently as well in a STW 431 // pause in PausePhase(). 432 void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) 433 REQUIRES_SHARED(Locks::mutator_lock_) 434 REQUIRES(Locks::heap_bitmap_lock_); 435 // Go through all the objects in the mark-stack until it's empty. 436 void ProcessMarkStack() override REQUIRES_SHARED(Locks::mutator_lock_) 437 REQUIRES(Locks::heap_bitmap_lock_); 438 void ExpandMarkStack() REQUIRES_SHARED(Locks::mutator_lock_) 439 REQUIRES(Locks::heap_bitmap_lock_); 440 441 // Scan object for references. If kUpdateLivewords is true then set bits in 442 // the live-words bitmap and add size to chunk-info. 443 template <bool kUpdateLiveWords> 444 void ScanObject(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) 445 REQUIRES(Locks::heap_bitmap_lock_); 446 // Push objects to the mark-stack right after successfully marking objects. 447 void PushOnMarkStack(mirror::Object* obj) 448 REQUIRES_SHARED(Locks::mutator_lock_) 449 REQUIRES(Locks::heap_bitmap_lock_); 450 451 // Update the live-words bitmap as well as add the object size to the 452 // chunk-info vector. Both are required for computation of post-compact addresses. 453 // Also updates freed_objects_ counter. 454 void UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) 455 REQUIRES_SHARED(Locks::mutator_lock_); 456 457 void ProcessReferences(Thread* self) 458 REQUIRES_SHARED(Locks::mutator_lock_) 459 REQUIRES(!Locks::heap_bitmap_lock_); 460 461 void MarkObjectNonNull(mirror::Object* obj, 462 mirror::Object* holder = nullptr, 463 MemberOffset offset = MemberOffset(0)) 464 REQUIRES_SHARED(Locks::mutator_lock_) 465 REQUIRES(Locks::heap_bitmap_lock_); 466 467 void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset) 468 REQUIRES_SHARED(Locks::mutator_lock_) 469 REQUIRES(Locks::heap_bitmap_lock_); 470 471 template <bool kParallel> 472 bool MarkObjectNonNullNoPush(mirror::Object* obj, 473 mirror::Object* holder = nullptr, 474 MemberOffset offset = MemberOffset(0)) 475 REQUIRES(Locks::heap_bitmap_lock_) 476 REQUIRES_SHARED(Locks::mutator_lock_); 477 478 void Sweep(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) 479 REQUIRES(Locks::heap_bitmap_lock_); 480 void SweepLargeObjects(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) 481 REQUIRES(Locks::heap_bitmap_lock_); 482 483 // Perform all kernel operations required for concurrent compaction. Includes 484 // mremap to move pre-compact pages to from-space, followed by userfaultfd 485 // registration on the moving space and linear-alloc. 486 void KernelPreparation(); 487 // Called by KernelPreparation() for every memory range being prepared for 488 // userfaultfd registration. 489 void KernelPrepareRangeForUffd(uint8_t* to_addr, 490 uint8_t* from_addr, 491 size_t map_size, 492 int fd, 493 uint8_t* shadow_addr = nullptr); 494 495 void RegisterUffd(void* addr, size_t size, int mode); 496 void UnregisterUffd(uint8_t* start, size_t len); 497 498 // Called by thread-pool workers to read uffd_ and process fault events. 499 template <int kMode> 500 void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); 501 // Called by thread-pool workers to compact and copy/map the fault page in 502 // moving space. 503 template <int kMode> 504 void ConcurrentlyProcessMovingPage(uint8_t* fault_page, 505 uint8_t* buf, 506 size_t nr_moving_space_used_pages) 507 REQUIRES_SHARED(Locks::mutator_lock_); 508 // Called by thread-pool workers to process and copy/map the fault page in 509 // linear-alloc. 510 template <int kMode> 511 void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) 512 REQUIRES_SHARED(Locks::mutator_lock_); 513 514 // Process concurrently all the pages in linear-alloc. Called by gc-thread. 515 void ProcessLinearAlloc() REQUIRES_SHARED(Locks::mutator_lock_); 516 517 // Returns true if the moving space can be compacted using uffd's minor-fault 518 // feature. 519 bool CanCompactMovingSpaceWithMinorFault(); 520 521 // Does the following: 522 // 1. Checks the status of to-space pages in [cur_page_idx, 523 // last_checked_reclaim_page_idx_) range to see whether the corresponding 524 // from-space pages can be reused. 525 // 2. Taking into consideration classes which are allocated after their 526 // objects (in address order), computes the page (in from-space) from which 527 // actual reclamation can be done. 528 // 3. Map the pages in [cur_page_idx, end_idx_for_mapping) range. 529 // 4. Madvise the pages in [page from (2), last_reclaimed_page_) 530 bool FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) 531 REQUIRES_SHARED(Locks::mutator_lock_); 532 533 // Maps processed pages (from moving space and linear-alloc) for uffd's 534 // minor-fault feature. We try to 'claim' all processed (and unmapped) pages 535 // contiguous to 'to_space_start'. 536 // kFirstPageMapping indicates if the first page is already claimed or not. It 537 // also indicates that the ioctl must succeed in mapping the first page. 538 template <bool kFirstPageMapping> 539 void MapProcessedPages(uint8_t* to_space_start, 540 Atomic<PageState>* state_arr, 541 size_t arr_idx, 542 size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_); 543 544 // Maps moving space pages in [start_idx, arr_len) range. It fetches the page 545 // address containing the compacted content from moving_pages_status_ array. 546 // 'from_fault' is true when called from userfault (sigbus handler). 547 // 'return_on_contention' is set to true by gc-thread while it is compacting 548 // pages. In the end it calls the function with `return_on_contention=false` 549 // to ensure all pages are mapped. Returns number of pages that are mapped. 550 size_t MapMovingSpacePages(size_t start_idx, 551 size_t arr_len, 552 bool from_fault, 553 bool return_on_contention) REQUIRES_SHARED(Locks::mutator_lock_); 554 IsValidFd(int fd)555 bool IsValidFd(int fd) const { return fd >= 0; } 556 GetPageStateFromWord(uint32_t page_word)557 PageState GetPageStateFromWord(uint32_t page_word) { 558 return static_cast<PageState>(static_cast<uint8_t>(page_word)); 559 } 560 GetMovingPageState(size_t idx)561 PageState GetMovingPageState(size_t idx) { 562 return GetPageStateFromWord(moving_pages_status_[idx].load(std::memory_order_acquire)); 563 } 564 565 // Add/update <class, obj> pair if class > obj and obj is the lowest address 566 // object of class. 567 ALWAYS_INLINE void UpdateClassAfterObjectMap(mirror::Object* obj) 568 REQUIRES_SHARED(Locks::mutator_lock_); 569 570 // Updates 'class_after_obj_map_' map by updating the keys (class) with its 571 // highest-address super-class (obtained from 'super_class_after_class_map_'), 572 // if there is any. This is to ensure we don't free from-space pages before 573 // the lowest-address obj is compacted. 574 void UpdateClassAfterObjMap(); 575 576 void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_) 577 REQUIRES(Locks::heap_bitmap_lock_); 578 579 // Map zero-pages in the given range. 'tolerate_eexist' and 'tolerate_enoent' 580 // help us decide if we should expect EEXIST or ENOENT back from the ioctl 581 // respectively. It may return after mapping fewer pages than requested. 582 // found to be contended, then we delay the operations based on thread's 583 // Returns number of bytes (multiple of page-size) now known to be mapped. 584 size_t ZeropageIoctl(void* addr, size_t length, bool tolerate_eexist, bool tolerate_enoent); 585 // Map 'buffer' to 'dst', both being 'length' bytes using at most one ioctl 586 // call. 'return_on_contention' indicates that the function should return 587 // as soon as mmap_lock contention is detected. Like ZeropageIoctl(), this 588 // function also uses thread's priority to decide how long we delay before 589 // forcing the ioctl operation. If ioctl returns EEXIST, then also function 590 // returns. Returns number of bytes (multiple of page-size) mapped. 591 size_t CopyIoctl(void* dst, void* buffer, size_t length, bool return_on_contention); 592 593 // Called after updating linear-alloc page(s) to map the page. It first 594 // updates the state of the pages to kProcessedAndMapping and after ioctl to 595 // kProcessedAndMapped. Returns true if at least the first page is now mapped. 596 // If 'free_pages' is true then also frees shadow pages. If 'single_ioctl' 597 // is true, then stops after first ioctl. 598 bool MapUpdatedLinearAllocPages(uint8_t* start_page, 599 uint8_t* start_shadow_page, 600 Atomic<PageState>* state, 601 size_t length, 602 bool free_pages, 603 bool single_ioctl); 604 // Called for clamping of 'info_map_' and other GC data structures, which are 605 // small and/or in >4GB address space. There is no real benefit of clamping 606 // them synchronously during app forking. It clamps only if clamp_info_map_status_ 607 // is set to kClampInfoPending, which is done by ClampGrowthLimit(). 608 void MaybeClampGcStructures() REQUIRES(Locks::heap_bitmap_lock_); 609 610 size_t ComputeInfoMapSize(); 611 // Initialize all the info-map related fields of this GC. Returns total size 612 // of all the structures in info-map. 613 size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz); 614 // Update class-table classes in compaction pause if we are running in debuggable 615 // mode. Only visit class-table in image spaces if 'immune_class_table_only' 616 // is true. 617 void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) 618 REQUIRES_SHARED(Locks::mutator_lock_); 619 620 // For checkpoints 621 Barrier gc_barrier_; 622 // Every object inside the immune spaces is assumed to be marked. 623 ImmuneSpaces immune_spaces_; 624 // Required only when mark-stack is accessed in shared mode, which happens 625 // when collecting thread-stack roots using checkpoint. Otherwise, we use it 626 // to synchronize on updated_roots_ in debug-builds. 627 Mutex lock_; 628 accounting::ObjectStack* mark_stack_; 629 // Special bitmap wherein all the bits corresponding to an object are set. 630 // TODO: make LiveWordsBitmap encapsulated in this class rather than a 631 // pointer. We tend to access its members in performance-sensitive 632 // code-path. Also, use a single MemMap for all the GC's data structures, 633 // which we will clear in the end. This would help in limiting the number of 634 // VMAs that get created in the kernel. 635 std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; 636 // Track GC-roots updated so far in a GC-cycle. This is to confirm that no 637 // GC-root is updated twice. 638 // TODO: Must be replaced with an efficient mechanism eventually. Or ensure 639 // that double updation doesn't happen in the first place. 640 std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); 641 MemMap from_space_map_; 642 MemMap shadow_to_space_map_; 643 // Any array of live-bytes in logical chunks of kOffsetChunkSize size 644 // in the 'to-be-compacted' space. 645 MemMap info_map_; 646 // Set of page-sized buffers used for compaction. The first page is used by 647 // the GC thread. Subdequent pages are used by mutator threads in case of 648 // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case 649 // the first page is also used for termination of concurrent compaction by 650 // making worker threads terminate the userfaultfd read loop. 651 MemMap compaction_buffers_map_; 652 653 class LessByArenaAddr { 654 public: operator()655 bool operator()(const TrackedArena* a, const TrackedArena* b) const { 656 return std::less<uint8_t*>{}(a->Begin(), b->Begin()); 657 } 658 }; 659 660 // Map of arenas allocated in LinearAlloc arena-pool and last non-zero page, 661 // captured during compaction pause for concurrent updates. 662 std::map<const TrackedArena*, uint8_t*, LessByArenaAddr> linear_alloc_arenas_; 663 // Set of PageStatus arrays, one per arena-pool space. It's extremely rare to 664 // have more than one, but this is to be ready for the worst case. 665 class LinearAllocSpaceData { 666 public: LinearAllocSpaceData(MemMap && shadow,MemMap && page_status_map,uint8_t * begin,uint8_t * end,bool already_shared)667 LinearAllocSpaceData(MemMap&& shadow, 668 MemMap&& page_status_map, 669 uint8_t* begin, 670 uint8_t* end, 671 bool already_shared) 672 : shadow_(std::move(shadow)), 673 page_status_map_(std::move(page_status_map)), 674 begin_(begin), 675 end_(end), 676 already_shared_(already_shared) {} 677 678 MemMap shadow_; 679 MemMap page_status_map_; 680 uint8_t* begin_; 681 uint8_t* end_; 682 // Indicates if the linear-alloc is already MAP_SHARED. 683 bool already_shared_; 684 }; 685 686 std::vector<LinearAllocSpaceData> linear_alloc_spaces_data_; 687 688 class ObjReferenceHash { 689 public: operator()690 uint32_t operator()(const ObjReference& ref) const { 691 return ref.AsVRegValue() >> kObjectAlignmentShift; 692 } 693 }; 694 695 class ObjReferenceEqualFn { 696 public: operator()697 bool operator()(const ObjReference& a, const ObjReference& b) const { 698 return a.AsMirrorPtr() == b.AsMirrorPtr(); 699 } 700 }; 701 702 class LessByObjReference { 703 public: operator()704 bool operator()(const ObjReference& a, const ObjReference& b) const { 705 return std::less<mirror::Object*>{}(a.AsMirrorPtr(), b.AsMirrorPtr()); 706 } 707 }; 708 709 // Data structures used to track objects whose layout information is stored in later 710 // allocated classes (at higher addresses). We must be careful not to free the 711 // corresponding from-space pages prematurely. 712 using ObjObjOrderedMap = std::map<ObjReference, ObjReference, LessByObjReference>; 713 using ObjObjUnorderedMap = 714 std::unordered_map<ObjReference, ObjReference, ObjReferenceHash, ObjReferenceEqualFn>; 715 // Unordered map of <K, S> such that the class K (in moving space) has kClassWalkSuper 716 // in reference bitmap and S is its highest address super class. 717 ObjObjUnorderedMap super_class_after_class_hash_map_; 718 // Unordered map of <K, V> such that the class K (in moving space) is after its objects 719 // or would require iterating super-class hierarchy when visiting references. And V is 720 // its lowest address object (in moving space). 721 ObjObjUnorderedMap class_after_obj_hash_map_; 722 // Ordered map constructed before starting compaction using the above two maps. Key is a 723 // class (or super-class) which is higher in address order than some of its object(s) and 724 // value is the corresponding object with lowest address. 725 ObjObjOrderedMap class_after_obj_ordered_map_; 726 // Since the compaction is done in reverse, we use a reverse iterator. It is maintained 727 // either at the pair whose class is lower than the first page to be freed, or at the 728 // pair whose object is not yet compacted. 729 ObjObjOrderedMap::const_reverse_iterator class_after_obj_iter_; 730 // Cached reference to the last class which has kClassWalkSuper in reference 731 // bitmap but has all its super classes lower address order than itself. 732 mirror::Class* walk_super_class_cache_; 733 // Used by FreeFromSpacePages() for maintaining markers in the moving space for 734 // how far the pages have been reclaimed (madvised) and checked. 735 // 736 // Pages from this index to the end of to-space have been checked (via page_status) 737 // and their corresponding from-space pages are reclaimable. 738 size_t last_checked_reclaim_page_idx_; 739 // All from-space pages in [last_reclaimed_page_, from_space->End()) are 740 // reclaimed (madvised). Pages in [from-space page corresponding to 741 // last_checked_reclaim_page_idx_, last_reclaimed_page_) are not reclaimed as 742 // they may contain classes required for class hierarchy traversal for 743 // visiting references during compaction. 744 uint8_t* last_reclaimed_page_; 745 // All the pages in [last_reclaimable_page_, last_reclaimed_page_) in 746 // from-space are available to store compacted contents for batching until the 747 // next time madvise is called. 748 uint8_t* last_reclaimable_page_; 749 // [cur_reclaimable_page_, last_reclaimed_page_) have been used to store 750 // compacted contents for batching. 751 uint8_t* cur_reclaimable_page_; 752 753 space::ContinuousSpace* non_moving_space_; 754 space::BumpPointerSpace* const bump_pointer_space_; 755 // The main space bitmap 756 accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; 757 accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; 758 Thread* thread_running_gc_; 759 // Array of moving-space's pages' compaction status, which is stored in the 760 // least-significant byte. kProcessed entries also contain the from-space 761 // offset of the page which contains the compacted contents of the ith 762 // to-space page. 763 Atomic<uint32_t>* moving_pages_status_; 764 size_t vector_length_; 765 size_t live_stack_freeze_size_; 766 767 uint64_t bytes_scanned_; 768 769 // For every page in the to-space (post-compact heap) we need to know the 770 // first object from which we must compact and/or update references. This is 771 // for both non-moving and moving space. Additionally, for the moving-space, 772 // we also need the offset within the object from where we need to start 773 // copying. 774 // chunk_info_vec_ holds live bytes for chunks during marking phase. After 775 // marking we perform an exclusive scan to compute offset for every chunk. 776 uint32_t* chunk_info_vec_; 777 // For pages before black allocations, pre_compact_offset_moving_space_[i] 778 // holds offset within the space from where the objects need to be copied in 779 // the ith post-compact page. 780 // Otherwise, black_alloc_pages_first_chunk_size_[i] holds the size of first 781 // non-empty chunk in the ith black-allocations page. 782 union { 783 uint32_t* pre_compact_offset_moving_space_; 784 uint32_t* black_alloc_pages_first_chunk_size_; 785 }; 786 // first_objs_moving_space_[i] is the pre-compact address of the object which 787 // would overlap with the starting boundary of the ith post-compact page. 788 ObjReference* first_objs_moving_space_; 789 // First object for every page. It could be greater than the page's start 790 // address, or null if the page is empty. 791 ObjReference* first_objs_non_moving_space_; 792 size_t non_moving_first_objs_count_; 793 // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_ 794 // arrays. Also the number of pages which are to be compacted. 795 size_t moving_first_objs_count_; 796 // Number of pages containing black-allocated objects, indicating number of 797 // pages to be slid. 798 size_t black_page_count_; 799 800 uint8_t* from_space_begin_; 801 // Cached values of moving-space range to optimize checking if reference 802 // belongs to moving-space or not. May get updated if and when heap is 803 // clamped. 804 uint8_t* const moving_space_begin_; 805 uint8_t* moving_space_end_; 806 // moving-space's end pointer at the marking pause. All allocations beyond 807 // this will be considered black in the current GC cycle. Aligned up to page 808 // size. 809 uint8_t* black_allocations_begin_; 810 // End of compacted space. Use for computing post-compact addr of black 811 // allocated objects. Aligned up to page size. 812 uint8_t* post_compact_end_; 813 // Cache (black_allocations_begin_ - post_compact_end_) for post-compact 814 // address computations. 815 ptrdiff_t black_objs_slide_diff_; 816 // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can 817 // compute from-space address of a given pre-comapct addr efficiently. 818 ptrdiff_t from_space_slide_diff_; 819 820 // TODO: Remove once an efficient mechanism to deal with double root updation 821 // is incorporated. 822 void* stack_high_addr_; 823 void* stack_low_addr_; 824 825 uint8_t* conc_compaction_termination_page_; 826 827 PointerSize pointer_size_; 828 // Number of objects freed during this GC in moving space. It is decremented 829 // every time an object is discovered. And total-object count is added to it 830 // in MarkingPause(). It reaches the correct count only once the marking phase 831 // is completed. 832 int32_t freed_objects_; 833 // memfds for moving space for using userfaultfd's minor-fault feature. 834 // Initialized to kFdUnused to indicate that mmap should be MAP_PRIVATE in 835 // KernelPrepareRange(). 836 int moving_to_space_fd_; 837 int moving_from_space_fd_; 838 // Userfault file descriptor, accessed only by the GC itself. 839 // kFallbackMode value indicates that we are in the fallback mode. 840 int uffd_; 841 // Number of mutator-threads currently executing SIGBUS handler. When the 842 // GC-thread is done with compaction, it set the most significant bit to 843 // indicate that. Mutator threads check for the flag when incrementing in the 844 // handler. 845 std::atomic<SigbusCounterType> sigbus_in_progress_count_; 846 // Number of mutator-threads/uffd-workers working on moving-space page. It 847 // must be 0 before gc-thread can unregister the space after it's done 848 // sequentially compacting all pages of the space. 849 std::atomic<uint16_t> compaction_in_progress_count_; 850 // When using SIGBUS feature, this counter is used by mutators to claim a page 851 // out of compaction buffers to be used for the entire compaction cycle. 852 std::atomic<uint16_t> compaction_buffer_counter_; 853 // Used to exit from compaction loop at the end of concurrent compaction 854 uint8_t thread_pool_counter_; 855 // True while compacting. 856 bool compacting_; 857 // Flag indicating whether one-time uffd initialization has been done. It will 858 // be false on the first GC for non-zygote processes, and always for zygote. 859 // Its purpose is to minimize the userfaultfd overhead to the minimal in 860 // Heap::PostForkChildAction() as it's invoked in app startup path. With 861 // this, we register the compaction-termination page on the first GC. 862 bool uffd_initialized_; 863 // Flag indicating if userfaultfd supports minor-faults. Set appropriately in 864 // CreateUserfaultfd(), where we get this information from the kernel. 865 const bool uffd_minor_fault_supported_; 866 // Flag indicating if we should use sigbus signals instead of threads to 867 // handle userfaults. 868 const bool use_uffd_sigbus_; 869 // For non-zygote processes this flag indicates if the spaces are ready to 870 // start using userfaultfd's minor-fault feature. This initialization involves 871 // starting to use shmem (memfd_create) for the userfaultfd protected spaces. 872 bool minor_fault_initialized_; 873 // Set to true when linear-alloc can start mapping with MAP_SHARED. Set on 874 // non-zygote processes during first GC, which sets up everyting for using 875 // minor-fault from next GC. 876 bool map_linear_alloc_shared_; 877 // Clamping statue of `info_map_`. Initialized with 'NotDone'. Once heap is 878 // clamped but info_map_ is delayed, we set it to 'Pending'. Once 'info_map_' 879 // is also clamped, then we set it to 'Finished'. 880 ClampInfoStatus clamp_info_map_status_; 881 882 class FlipCallback; 883 class ThreadFlipVisitor; 884 class VerifyRootMarkedVisitor; 885 class ScanObjectVisitor; 886 class CheckpointMarkThreadRoots; 887 template <size_t kBufferSize> 888 class ThreadRootsVisitor; 889 class RefFieldsVisitor; 890 template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; 891 class ArenaPoolPageUpdater; 892 class ClassLoaderRootsUpdater; 893 class LinearAllocPageUpdater; 894 class ImmuneSpaceUpdateObjVisitor; 895 class ConcurrentCompactionGcTask; 896 897 DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); 898 }; 899 900 std::ostream& operator<<(std::ostream& os, MarkCompact::PageState value); 901 std::ostream& operator<<(std::ostream& os, MarkCompact::ClampInfoStatus value); 902 903 } // namespace collector 904 } // namespace gc 905 } // namespace art 906 907 #endif // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 908