1 /*
2  * Copyright 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
18 #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
19 
20 #include <signal.h>
21 
22 #include <map>
23 #include <memory>
24 #include <unordered_map>
25 #include <unordered_set>
26 
27 #include "barrier.h"
28 #include "base/atomic.h"
29 #include "base/gc_visited_arena_pool.h"
30 #include "base/macros.h"
31 #include "base/mutex.h"
32 #include "garbage_collector.h"
33 #include "gc/accounting/atomic_stack.h"
34 #include "gc/accounting/bitmap-inl.h"
35 #include "gc/accounting/heap_bitmap.h"
36 #include "gc_root.h"
37 #include "immune_spaces.h"
38 #include "offsets.h"
39 
40 namespace art HIDDEN {
41 
42 EXPORT bool KernelSupportsUffd();
43 
44 namespace mirror {
45 class DexCache;
46 }  // namespace mirror
47 
48 namespace gc {
49 
50 class Heap;
51 
52 namespace space {
53 class BumpPointerSpace;
54 }  // namespace space
55 
56 namespace collector {
57 class MarkCompact final : public GarbageCollector {
58  public:
59   using SigbusCounterType = uint32_t;
60 
61   static constexpr size_t kAlignment = kObjectAlignment;
62   static constexpr int kCopyMode = -1;
63   static constexpr int kMinorFaultMode = -2;
64   // Fake file descriptor for fall back mode (when uffd isn't available)
65   static constexpr int kFallbackMode = -3;
66   static constexpr int kFdSharedAnon = -1;
67   static constexpr int kFdUnused = -2;
68 
69   // Bitmask for the compaction-done bit in the sigbus_in_progress_count_.
70   static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask =
71       1u << (BitSizeOf<SigbusCounterType>() - 1);
72 
73   explicit MarkCompact(Heap* heap);
74 
~MarkCompact()75   ~MarkCompact() {}
76 
77   void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_);
78 
79   void ClampGrowthLimit(size_t new_capacity) REQUIRES(Locks::heap_bitmap_lock_);
80   // Updated before (or in) pre-compaction pause and is accessed only in the
81   // pause or during concurrent compaction. The flag is reset in next GC cycle's
82   // InitializePhase(). Therefore, it's safe to update without any memory ordering.
IsCompacting()83   bool IsCompacting() const { return compacting_; }
84 
IsUsingSigbusFeature()85   bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; }
86 
87   // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which
88   // is asserted in the function.
89   bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS;
90 
GetGcType()91   GcType GetGcType() const override {
92     return kGcTypeFull;
93   }
94 
GetCollectorType()95   CollectorType GetCollectorType() const override {
96     return kCollectorTypeCMC;
97   }
98 
GetBarrier()99   Barrier& GetBarrier() {
100     return gc_barrier_;
101   }
102 
103   mirror::Object* MarkObject(mirror::Object* obj) override
104       REQUIRES_SHARED(Locks::mutator_lock_)
105       REQUIRES(Locks::heap_bitmap_lock_);
106 
107   void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
108                          bool do_atomic_update) override
109       REQUIRES_SHARED(Locks::mutator_lock_)
110       REQUIRES(Locks::heap_bitmap_lock_);
111 
112   void VisitRoots(mirror::Object*** roots,
113                   size_t count,
114                   const RootInfo& info) override
115       REQUIRES_SHARED(Locks::mutator_lock_)
116       REQUIRES(Locks::heap_bitmap_lock_);
117   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
118                   size_t count,
119                   const RootInfo& info) override
120       REQUIRES_SHARED(Locks::mutator_lock_)
121       REQUIRES(Locks::heap_bitmap_lock_);
122 
123   bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
124                                    bool do_atomic_update) override
125       REQUIRES_SHARED(Locks::mutator_lock_)
126       REQUIRES(Locks::heap_bitmap_lock_);
127 
128   void RevokeAllThreadLocalBuffers() override;
129 
130   void DelayReferenceReferent(ObjPtr<mirror::Class> klass,
131                               ObjPtr<mirror::Reference> reference) override
132       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
133 
134   mirror::Object* IsMarked(mirror::Object* obj) override
135       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
136 
GetFromSpaceAddrFromBarrier(mirror::Object * old_ref)137   mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) {
138     CHECK(compacting_);
139     if (HasAddress(old_ref)) {
140       return GetFromSpaceAddr(old_ref);
141     }
142     return old_ref;
143   }
144   // Called from Heap::PostForkChildAction() for non-zygote processes and from
145   // PrepareForCompaction() for zygote processes. Returns true if uffd was
146   // created or was already done.
147   bool CreateUserfaultfd(bool post_fork);
148 
149   // Returns a pair indicating if userfaultfd itself is available (first) and if
150   // so then whether its minor-fault feature is available or not (second).
151   static std::pair<bool, bool> GetUffdAndMinorFault();
152 
153   // Add linear-alloc space data when a new space is added to
154   // GcVisitedArenaPool, which mostly happens only once.
155   void AddLinearAllocSpaceData(uint8_t* begin, size_t len);
156 
157   // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as
158   // it's given that processing thread also copies the page, thereby mapping it.
159   // The order is important as we may treat them as integers. Also
160   // 'kUnprocessed' should be set to 0 as we rely on madvise(dontneed) to return
161   // us zero'ed pages, which implicitly makes page-status initialized to 'kUnprocessed'.
162   enum class PageState : uint8_t {
163     kUnprocessed = 0,           // Not processed yet.
164     kProcessing = 1,            // Being processed by GC thread and will not be mapped
165     kProcessed = 2,             // Processed but not mapped
166     kProcessingAndMapping = 3,  // Being processed by GC or mutator and will be mapped
167     kMutatorProcessing = 4,     // Being processed by mutator thread
168     kProcessedAndMapping = 5,   // Processed and will be mapped
169     kProcessedAndMapped = 6     // Processed and mapped. For SIGBUS.
170   };
171 
172   // Different heap clamping states.
173   enum class ClampInfoStatus : uint8_t {
174     kClampInfoNotDone,
175     kClampInfoPending,
176     kClampInfoFinished
177   };
178 
179  private:
180   using ObjReference = mirror::CompressedReference<mirror::Object>;
181   static constexpr uint32_t kPageStateMask = (1 << BitSizeOf<uint8_t>()) - 1;
182   // Number of bits (live-words) covered by a single chunk-info (below)
183   // entry/word.
184   // TODO: Since popcount is performed usomg SIMD instructions, we should
185   // consider using 128-bit in order to halve the chunk-info size.
186   static constexpr uint32_t kBitsPerVectorWord = kBitsPerIntPtrT;
187   static constexpr uint32_t kOffsetChunkSize = kBitsPerVectorWord * kAlignment;
188   static_assert(kOffsetChunkSize < kMinPageSize);
189   // Bitmap with bits corresponding to every live word set. For an object
190   // which is 4 words in size will have the corresponding 4 bits set. This is
191   // required for efficient computation of new-address (post-compaction) from
192   // the given old-address (pre-compaction).
193   template <size_t kAlignment>
194   class LiveWordsBitmap : private accounting::MemoryRangeBitmap<kAlignment> {
195     using Bitmap = accounting::Bitmap;
196     using MemRangeBitmap = accounting::MemoryRangeBitmap<kAlignment>;
197 
198    public:
199     static_assert(IsPowerOfTwo(kBitsPerVectorWord));
200     static_assert(IsPowerOfTwo(Bitmap::kBitsPerBitmapWord));
201     static_assert(kBitsPerVectorWord >= Bitmap::kBitsPerBitmapWord);
202     static constexpr uint32_t kBitmapWordsPerVectorWord =
203             kBitsPerVectorWord / Bitmap::kBitsPerBitmapWord;
204     static_assert(IsPowerOfTwo(kBitmapWordsPerVectorWord));
205     using MemRangeBitmap::SetBitmapSize;
206     static LiveWordsBitmap* Create(uintptr_t begin, uintptr_t end);
207 
208     // Return offset (within the indexed chunk-info) of the nth live word.
209     uint32_t FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const;
210     // Sets all bits in the bitmap corresponding to the given range. Also
211     // returns the bit-index of the first word.
212     ALWAYS_INLINE uintptr_t SetLiveWords(uintptr_t begin, size_t size);
213     // Count number of live words upto the given bit-index. This is to be used
214     // to compute the post-compact address of an old reference.
215     ALWAYS_INLINE size_t CountLiveWordsUpto(size_t bit_idx) const;
216     // Call 'visitor' for every stride of contiguous marked bits in the live-words
217     // bitmap, starting from begin_bit_idx. Only visit 'bytes' live bytes or
218     // until 'end', whichever comes first.
219     // Visitor is called with index of the first marked bit in the stride,
220     // stride size and whether it's the last stride in the given range or not.
221     template <typename Visitor>
222     ALWAYS_INLINE void VisitLiveStrides(uintptr_t begin_bit_idx,
223                                         uint8_t* end,
224                                         const size_t bytes,
225                                         Visitor&& visitor) const
226         REQUIRES_SHARED(Locks::mutator_lock_);
227     // Count the number of live bytes in the given vector entry.
228     size_t LiveBytesInBitmapWord(size_t chunk_idx) const;
ClearBitmap()229     void ClearBitmap() { Bitmap::Clear(); }
Begin()230     ALWAYS_INLINE uintptr_t Begin() const { return MemRangeBitmap::CoverBegin(); }
HasAddress(mirror::Object * obj)231     ALWAYS_INLINE bool HasAddress(mirror::Object* obj) const {
232       return MemRangeBitmap::HasAddress(reinterpret_cast<uintptr_t>(obj));
233     }
Test(uintptr_t bit_index)234     ALWAYS_INLINE bool Test(uintptr_t bit_index) const {
235       return Bitmap::TestBit(bit_index);
236     }
Test(mirror::Object * obj)237     ALWAYS_INLINE bool Test(mirror::Object* obj) const {
238       return MemRangeBitmap::Test(reinterpret_cast<uintptr_t>(obj));
239     }
GetWord(size_t index)240     ALWAYS_INLINE uintptr_t GetWord(size_t index) const {
241       static_assert(kBitmapWordsPerVectorWord == 1);
242       return Bitmap::Begin()[index * kBitmapWordsPerVectorWord];
243     }
244   };
245 
HasAddress(mirror::Object * obj,uint8_t * begin,uint8_t * end)246   static bool HasAddress(mirror::Object* obj, uint8_t* begin, uint8_t* end) {
247     uint8_t* ptr = reinterpret_cast<uint8_t*>(obj);
248     return ptr >= begin && ptr < end;
249   }
250 
HasAddress(mirror::Object * obj)251   bool HasAddress(mirror::Object* obj) const {
252     return HasAddress(obj, moving_space_begin_, moving_space_end_);
253   }
254   // For a given object address in pre-compact space, return the corresponding
255   // address in the from-space, where heap pages are relocated in the compaction
256   // pause.
GetFromSpaceAddr(mirror::Object * obj)257   mirror::Object* GetFromSpaceAddr(mirror::Object* obj) const {
258     DCHECK(HasAddress(obj)) << " obj=" << obj;
259     return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(obj)
260                                              + from_space_slide_diff_);
261   }
262 
263   // Verifies that that given object reference refers to a valid object.
264   // Otherwise fataly dumps logs, including those from callback.
265   template <typename Callback>
266   void VerifyObject(mirror::Object* ref, Callback& callback) const
267       REQUIRES_SHARED(Locks::mutator_lock_);
268   // Check if the obj is within heap and has a klass which is likely to be valid
269   // mirror::Class.
270   bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_);
271   void InitializePhase();
272   void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_);
273   void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_);
274   void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_);
275 
276   void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused)
277       REQUIRES_SHARED(Locks::mutator_lock_)
278       REQUIRES(!Locks::heap_bitmap_lock_);
279   // Update the reference at given offset in the given object with post-compact
280   // address. [begin, end) is moving-space range.
281   ALWAYS_INLINE void UpdateRef(mirror::Object* obj,
282                                MemberOffset offset,
283                                uint8_t* begin,
284                                uint8_t* end) REQUIRES_SHARED(Locks::mutator_lock_);
285 
286   // Verify that the gc-root is updated only once. Returns false if the update
287   // shouldn't be done.
288   ALWAYS_INLINE bool VerifyRootSingleUpdate(void* root,
289                                             mirror::Object* old_ref,
290                                             const RootInfo& info)
291       REQUIRES_SHARED(Locks::mutator_lock_);
292   // Update the given root with post-compact address. [begin, end) is
293   // moving-space range.
294   ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root,
295                                 uint8_t* begin,
296                                 uint8_t* end,
297                                 const RootInfo& info = RootInfo(RootType::kRootUnknown))
298       REQUIRES_SHARED(Locks::mutator_lock_);
299   ALWAYS_INLINE void UpdateRoot(mirror::Object** root,
300                                 uint8_t* begin,
301                                 uint8_t* end,
302                                 const RootInfo& info = RootInfo(RootType::kRootUnknown))
303       REQUIRES_SHARED(Locks::mutator_lock_);
304   // Given the pre-compact address, the function returns the post-compact
305   // address of the given object. [begin, end) is moving-space range.
306   ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref,
307                                                    uint8_t* begin,
308                                                    uint8_t* end) const
309       REQUIRES_SHARED(Locks::mutator_lock_);
310   // Compute post-compact address of an object in moving space. This function
311   // assumes that old_ref is in moving space.
312   ALWAYS_INLINE mirror::Object* PostCompactAddressUnchecked(mirror::Object* old_ref) const
313       REQUIRES_SHARED(Locks::mutator_lock_);
314   // Compute the new address for an object which was allocated prior to starting
315   // this GC cycle.
316   ALWAYS_INLINE mirror::Object* PostCompactOldObjAddr(mirror::Object* old_ref) const
317       REQUIRES_SHARED(Locks::mutator_lock_);
318   // Compute the new address for an object which was black allocated during this
319   // GC cycle.
320   ALWAYS_INLINE mirror::Object* PostCompactBlackObjAddr(mirror::Object* old_ref) const
321       REQUIRES_SHARED(Locks::mutator_lock_);
322   // Clears (for alloc spaces in the beginning of marking phase) or ages the
323   // card table. Also, identifies immune spaces and mark bitmap.
324   void PrepareCardTableForMarking(bool clear_alloc_space_cards)
325       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
326 
327   // Perform one last round of marking, identifying roots from dirty cards
328   // during a stop-the-world (STW) pause.
329   void MarkingPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_);
330   // Perform stop-the-world pause prior to concurrent compaction.
331   // Updates GC-roots and protects heap so that during the concurrent
332   // compaction phase we can receive faults and compact the corresponding pages
333   // on the fly.
334   void CompactionPause() REQUIRES(Locks::mutator_lock_);
335   // Compute offsets (in chunk_info_vec_) and other data structures required
336   // during concurrent compaction.
337   void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_);
338 
339   // Copy gPageSize live bytes starting from 'offset' (within the moving space),
340   // which must be within 'obj', into the gPageSize sized memory pointed by 'addr'.
341   // Then update the references within the copied objects. The boundary objects are
342   // partially updated such that only the references that lie in the page are updated.
343   // This is necessary to avoid cascading userfaults.
344   void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero)
345       REQUIRES_SHARED(Locks::mutator_lock_);
346   // Compact the bump-pointer space. Pass page that should be used as buffer for
347   // userfaultfd.
348   template <int kMode>
349   void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_);
350 
351   // Compact the given page as per func and change its state. Also map/copy the
352   // page, if required. Returns true if the page was compacted, else false.
353   template <int kMode, typename CompactionFn>
354   ALWAYS_INLINE bool DoPageCompactionWithStateChange(size_t page_idx,
355                                                      uint8_t* to_space_page,
356                                                      uint8_t* page,
357                                                      bool map_immediately,
358                                                      CompactionFn func)
359       REQUIRES_SHARED(Locks::mutator_lock_);
360 
361   // Update all the objects in the given non-moving space page. 'first' object
362   // could have started in some preceding page.
363   void UpdateNonMovingPage(mirror::Object* first, uint8_t* page)
364       REQUIRES_SHARED(Locks::mutator_lock_);
365   // Update all the references in the non-moving space.
366   void UpdateNonMovingSpace() REQUIRES_SHARED(Locks::mutator_lock_);
367 
368   // For all the pages in non-moving space, find the first object that overlaps
369   // with the pages' start address, and store in first_objs_non_moving_space_ array.
370   void InitNonMovingSpaceFirstObjects() REQUIRES_SHARED(Locks::mutator_lock_);
371   // In addition to the first-objects for every post-compact moving space page,
372   // also find offsets within those objects from where the contents should be
373   // copied to the page. The offsets are relative to the moving-space's
374   // beginning. Store the computed first-object and offset in first_objs_moving_space_
375   // and pre_compact_offset_moving_space_ respectively.
376   void InitMovingSpaceFirstObjects(const size_t vec_len) REQUIRES_SHARED(Locks::mutator_lock_);
377 
378   // Gather the info related to black allocations from bump-pointer space to
379   // enable concurrent sliding of these pages.
380   void UpdateMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
381   // Update first-object info from allocation-stack for non-moving space black
382   // allocations.
383   void UpdateNonMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
384 
385   // Slides (retain the empty holes, which are usually part of some in-use TLAB)
386   // black page in the moving space. 'first_obj' is the object that overlaps with
387   // the first byte of the page being slid. pre_compact_page is the pre-compact
388   // address of the page being slid. 'dest' is the gPageSize sized memory where
389   // the contents would be copied.
390   void SlideBlackPage(mirror::Object* first_obj,
391                       mirror::Object* next_page_first_obj,
392                       uint32_t first_chunk_size,
393                       uint8_t* const pre_compact_page,
394                       uint8_t* dest,
395                       bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_);
396 
397   // Perform reference-processing and the likes before sweeping the non-movable
398   // spaces.
399   void ReclaimPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_);
400 
401   // Mark GC-roots (except from immune spaces and thread-stacks) during a STW pause.
402   void ReMarkRoots(Runtime* runtime) REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
403   // Concurrently mark GC-roots, except from immune spaces.
404   void MarkRoots(VisitRootFlags flags) REQUIRES_SHARED(Locks::mutator_lock_)
405       REQUIRES(Locks::heap_bitmap_lock_);
406   // Collect thread stack roots via a checkpoint.
407   void MarkRootsCheckpoint(Thread* self, Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_)
408       REQUIRES(Locks::heap_bitmap_lock_);
409   // Second round of concurrent marking. Mark all gray objects that got dirtied
410   // since the first round.
411   void PreCleanCards() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
412 
413   void MarkNonThreadRoots(Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_)
414       REQUIRES(Locks::heap_bitmap_lock_);
415   void MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime)
416       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
417 
418   // Traverse through the reachable objects and mark them.
419   void MarkReachableObjects() REQUIRES_SHARED(Locks::mutator_lock_)
420       REQUIRES(Locks::heap_bitmap_lock_);
421   // Scan (only) immune spaces looking for references into the garbage collected
422   // spaces.
423   void UpdateAndMarkModUnion() REQUIRES_SHARED(Locks::mutator_lock_)
424       REQUIRES(Locks::heap_bitmap_lock_);
425   // Scan mod-union and card tables, covering all the spaces, to identify dirty objects.
426   // These are in 'minimum age' cards, which is 'kCardAged' in case of concurrent (second round)
427   // marking and kCardDirty during the STW pause.
428   void ScanDirtyObjects(bool paused, uint8_t minimum_age) REQUIRES_SHARED(Locks::mutator_lock_)
429       REQUIRES(Locks::heap_bitmap_lock_);
430   // Recursively mark dirty objects. Invoked both concurrently as well in a STW
431   // pause in PausePhase().
432   void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age)
433       REQUIRES_SHARED(Locks::mutator_lock_)
434       REQUIRES(Locks::heap_bitmap_lock_);
435   // Go through all the objects in the mark-stack until it's empty.
436   void ProcessMarkStack() override REQUIRES_SHARED(Locks::mutator_lock_)
437       REQUIRES(Locks::heap_bitmap_lock_);
438   void ExpandMarkStack() REQUIRES_SHARED(Locks::mutator_lock_)
439       REQUIRES(Locks::heap_bitmap_lock_);
440 
441   // Scan object for references. If kUpdateLivewords is true then set bits in
442   // the live-words bitmap and add size to chunk-info.
443   template <bool kUpdateLiveWords>
444   void ScanObject(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
445       REQUIRES(Locks::heap_bitmap_lock_);
446   // Push objects to the mark-stack right after successfully marking objects.
447   void PushOnMarkStack(mirror::Object* obj)
448       REQUIRES_SHARED(Locks::mutator_lock_)
449       REQUIRES(Locks::heap_bitmap_lock_);
450 
451   // Update the live-words bitmap as well as add the object size to the
452   // chunk-info vector. Both are required for computation of post-compact addresses.
453   // Also updates freed_objects_ counter.
454   void UpdateLivenessInfo(mirror::Object* obj, size_t obj_size)
455       REQUIRES_SHARED(Locks::mutator_lock_);
456 
457   void ProcessReferences(Thread* self)
458       REQUIRES_SHARED(Locks::mutator_lock_)
459       REQUIRES(!Locks::heap_bitmap_lock_);
460 
461   void MarkObjectNonNull(mirror::Object* obj,
462                          mirror::Object* holder = nullptr,
463                          MemberOffset offset = MemberOffset(0))
464       REQUIRES_SHARED(Locks::mutator_lock_)
465       REQUIRES(Locks::heap_bitmap_lock_);
466 
467   void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset)
468       REQUIRES_SHARED(Locks::mutator_lock_)
469       REQUIRES(Locks::heap_bitmap_lock_);
470 
471   template <bool kParallel>
472   bool MarkObjectNonNullNoPush(mirror::Object* obj,
473                                mirror::Object* holder = nullptr,
474                                MemberOffset offset = MemberOffset(0))
475       REQUIRES(Locks::heap_bitmap_lock_)
476       REQUIRES_SHARED(Locks::mutator_lock_);
477 
478   void Sweep(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_)
479       REQUIRES(Locks::heap_bitmap_lock_);
480   void SweepLargeObjects(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_)
481       REQUIRES(Locks::heap_bitmap_lock_);
482 
483   // Perform all kernel operations required for concurrent compaction. Includes
484   // mremap to move pre-compact pages to from-space, followed by userfaultfd
485   // registration on the moving space and linear-alloc.
486   void KernelPreparation();
487   // Called by KernelPreparation() for every memory range being prepared for
488   // userfaultfd registration.
489   void KernelPrepareRangeForUffd(uint8_t* to_addr,
490                                  uint8_t* from_addr,
491                                  size_t map_size,
492                                  int fd,
493                                  uint8_t* shadow_addr = nullptr);
494 
495   void RegisterUffd(void* addr, size_t size, int mode);
496   void UnregisterUffd(uint8_t* start, size_t len);
497 
498   // Called by thread-pool workers to read uffd_ and process fault events.
499   template <int kMode>
500   void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_);
501   // Called by thread-pool workers to compact and copy/map the fault page in
502   // moving space.
503   template <int kMode>
504   void ConcurrentlyProcessMovingPage(uint8_t* fault_page,
505                                      uint8_t* buf,
506                                      size_t nr_moving_space_used_pages)
507       REQUIRES_SHARED(Locks::mutator_lock_);
508   // Called by thread-pool workers to process and copy/map the fault page in
509   // linear-alloc.
510   template <int kMode>
511   void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault)
512       REQUIRES_SHARED(Locks::mutator_lock_);
513 
514   // Process concurrently all the pages in linear-alloc. Called by gc-thread.
515   void ProcessLinearAlloc() REQUIRES_SHARED(Locks::mutator_lock_);
516 
517   // Returns true if the moving space can be compacted using uffd's minor-fault
518   // feature.
519   bool CanCompactMovingSpaceWithMinorFault();
520 
521   // Does the following:
522   // 1. Checks the status of to-space pages in [cur_page_idx,
523   //    last_checked_reclaim_page_idx_) range to see whether the corresponding
524   //    from-space pages can be reused.
525   // 2. Taking into consideration classes which are allocated after their
526   //    objects (in address order), computes the page (in from-space) from which
527   //    actual reclamation can be done.
528   // 3. Map the pages in [cur_page_idx, end_idx_for_mapping) range.
529   // 4. Madvise the pages in [page from (2), last_reclaimed_page_)
530   bool FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping)
531       REQUIRES_SHARED(Locks::mutator_lock_);
532 
533   // Maps processed pages (from moving space and linear-alloc) for uffd's
534   // minor-fault feature. We try to 'claim' all processed (and unmapped) pages
535   // contiguous to 'to_space_start'.
536   // kFirstPageMapping indicates if the first page is already claimed or not. It
537   // also indicates that the ioctl must succeed in mapping the first page.
538   template <bool kFirstPageMapping>
539   void MapProcessedPages(uint8_t* to_space_start,
540                          Atomic<PageState>* state_arr,
541                          size_t arr_idx,
542                          size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_);
543 
544   // Maps moving space pages in [start_idx, arr_len) range. It fetches the page
545   // address containing the compacted content from moving_pages_status_ array.
546   // 'from_fault' is true when called from userfault (sigbus handler).
547   // 'return_on_contention' is set to true by gc-thread while it is compacting
548   // pages. In the end it calls the function with `return_on_contention=false`
549   // to ensure all pages are mapped. Returns number of pages that are mapped.
550   size_t MapMovingSpacePages(size_t start_idx,
551                              size_t arr_len,
552                              bool from_fault,
553                              bool return_on_contention) REQUIRES_SHARED(Locks::mutator_lock_);
554 
IsValidFd(int fd)555   bool IsValidFd(int fd) const { return fd >= 0; }
556 
GetPageStateFromWord(uint32_t page_word)557   PageState GetPageStateFromWord(uint32_t page_word) {
558     return static_cast<PageState>(static_cast<uint8_t>(page_word));
559   }
560 
GetMovingPageState(size_t idx)561   PageState GetMovingPageState(size_t idx) {
562     return GetPageStateFromWord(moving_pages_status_[idx].load(std::memory_order_acquire));
563   }
564 
565   // Add/update <class, obj> pair if class > obj and obj is the lowest address
566   // object of class.
567   ALWAYS_INLINE void UpdateClassAfterObjectMap(mirror::Object* obj)
568       REQUIRES_SHARED(Locks::mutator_lock_);
569 
570   // Updates 'class_after_obj_map_' map by updating the keys (class) with its
571   // highest-address super-class (obtained from 'super_class_after_class_map_'),
572   // if there is any. This is to ensure we don't free from-space pages before
573   // the lowest-address obj is compacted.
574   void UpdateClassAfterObjMap();
575 
576   void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_)
577       REQUIRES(Locks::heap_bitmap_lock_);
578 
579   // Map zero-pages in the given range. 'tolerate_eexist' and 'tolerate_enoent'
580   // help us decide if we should expect EEXIST or ENOENT back from the ioctl
581   // respectively. It may return after mapping fewer pages than requested.
582   // found to be contended, then we delay the operations based on thread's
583   // Returns number of bytes (multiple of page-size) now known to be mapped.
584   size_t ZeropageIoctl(void* addr, size_t length, bool tolerate_eexist, bool tolerate_enoent);
585   // Map 'buffer' to 'dst', both being 'length' bytes using at most one ioctl
586   // call. 'return_on_contention' indicates that the function should return
587   // as soon as mmap_lock contention is detected. Like ZeropageIoctl(), this
588   // function also uses thread's priority to decide how long we delay before
589   // forcing the ioctl operation. If ioctl returns EEXIST, then also function
590   // returns. Returns number of bytes (multiple of page-size) mapped.
591   size_t CopyIoctl(void* dst, void* buffer, size_t length, bool return_on_contention);
592 
593   // Called after updating linear-alloc page(s) to map the page. It first
594   // updates the state of the pages to kProcessedAndMapping and after ioctl to
595   // kProcessedAndMapped. Returns true if at least the first page is now mapped.
596   // If 'free_pages' is true then also frees shadow pages. If 'single_ioctl'
597   // is true, then stops after first ioctl.
598   bool MapUpdatedLinearAllocPages(uint8_t* start_page,
599                                   uint8_t* start_shadow_page,
600                                   Atomic<PageState>* state,
601                                   size_t length,
602                                   bool free_pages,
603                                   bool single_ioctl);
604   // Called for clamping of 'info_map_' and other GC data structures, which are
605   // small and/or in >4GB address space. There is no real benefit of clamping
606   // them synchronously during app forking. It clamps only if clamp_info_map_status_
607   // is set to kClampInfoPending, which is done by ClampGrowthLimit().
608   void MaybeClampGcStructures() REQUIRES(Locks::heap_bitmap_lock_);
609 
610   size_t ComputeInfoMapSize();
611   // Initialize all the info-map related fields of this GC. Returns total size
612   // of all the structures in info-map.
613   size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz);
614   // Update class-table classes in compaction pause if we are running in debuggable
615   // mode. Only visit class-table in image spaces if 'immune_class_table_only'
616   // is true.
617   void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only)
618       REQUIRES_SHARED(Locks::mutator_lock_);
619 
620   // For checkpoints
621   Barrier gc_barrier_;
622   // Every object inside the immune spaces is assumed to be marked.
623   ImmuneSpaces immune_spaces_;
624   // Required only when mark-stack is accessed in shared mode, which happens
625   // when collecting thread-stack roots using checkpoint. Otherwise, we use it
626   // to synchronize on updated_roots_ in debug-builds.
627   Mutex lock_;
628   accounting::ObjectStack* mark_stack_;
629   // Special bitmap wherein all the bits corresponding to an object are set.
630   // TODO: make LiveWordsBitmap encapsulated in this class rather than a
631   // pointer. We tend to access its members in performance-sensitive
632   // code-path. Also, use a single MemMap for all the GC's data structures,
633   // which we will clear in the end. This would help in limiting the number of
634   // VMAs that get created in the kernel.
635   std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_;
636   // Track GC-roots updated so far in a GC-cycle. This is to confirm that no
637   // GC-root is updated twice.
638   // TODO: Must be replaced with an efficient mechanism eventually. Or ensure
639   // that double updation doesn't happen in the first place.
640   std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_);
641   MemMap from_space_map_;
642   MemMap shadow_to_space_map_;
643   // Any array of live-bytes in logical chunks of kOffsetChunkSize size
644   // in the 'to-be-compacted' space.
645   MemMap info_map_;
646   // Set of page-sized buffers used for compaction. The first page is used by
647   // the GC thread. Subdequent pages are used by mutator threads in case of
648   // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case
649   // the first page is also used for termination of concurrent compaction by
650   // making worker threads terminate the userfaultfd read loop.
651   MemMap compaction_buffers_map_;
652 
653   class LessByArenaAddr {
654    public:
operator()655     bool operator()(const TrackedArena* a, const TrackedArena* b) const {
656       return std::less<uint8_t*>{}(a->Begin(), b->Begin());
657     }
658   };
659 
660   // Map of arenas allocated in LinearAlloc arena-pool and last non-zero page,
661   // captured during compaction pause for concurrent updates.
662   std::map<const TrackedArena*, uint8_t*, LessByArenaAddr> linear_alloc_arenas_;
663   // Set of PageStatus arrays, one per arena-pool space. It's extremely rare to
664   // have more than one, but this is to be ready for the worst case.
665   class LinearAllocSpaceData {
666    public:
LinearAllocSpaceData(MemMap && shadow,MemMap && page_status_map,uint8_t * begin,uint8_t * end,bool already_shared)667     LinearAllocSpaceData(MemMap&& shadow,
668                          MemMap&& page_status_map,
669                          uint8_t* begin,
670                          uint8_t* end,
671                          bool already_shared)
672         : shadow_(std::move(shadow)),
673           page_status_map_(std::move(page_status_map)),
674           begin_(begin),
675           end_(end),
676           already_shared_(already_shared) {}
677 
678     MemMap shadow_;
679     MemMap page_status_map_;
680     uint8_t* begin_;
681     uint8_t* end_;
682     // Indicates if the linear-alloc is already MAP_SHARED.
683     bool already_shared_;
684   };
685 
686   std::vector<LinearAllocSpaceData> linear_alloc_spaces_data_;
687 
688   class ObjReferenceHash {
689    public:
operator()690     uint32_t operator()(const ObjReference& ref) const {
691       return ref.AsVRegValue() >> kObjectAlignmentShift;
692     }
693   };
694 
695   class ObjReferenceEqualFn {
696    public:
operator()697     bool operator()(const ObjReference& a, const ObjReference& b) const {
698       return a.AsMirrorPtr() == b.AsMirrorPtr();
699     }
700   };
701 
702   class LessByObjReference {
703    public:
operator()704     bool operator()(const ObjReference& a, const ObjReference& b) const {
705       return std::less<mirror::Object*>{}(a.AsMirrorPtr(), b.AsMirrorPtr());
706     }
707   };
708 
709   // Data structures used to track objects whose layout information is stored in later
710   // allocated classes (at higher addresses). We must be careful not to free the
711   // corresponding from-space pages prematurely.
712   using ObjObjOrderedMap = std::map<ObjReference, ObjReference, LessByObjReference>;
713   using ObjObjUnorderedMap =
714       std::unordered_map<ObjReference, ObjReference, ObjReferenceHash, ObjReferenceEqualFn>;
715   // Unordered map of <K, S> such that the class K (in moving space) has kClassWalkSuper
716   // in reference bitmap and S is its highest address super class.
717   ObjObjUnorderedMap super_class_after_class_hash_map_;
718   // Unordered map of <K, V> such that the class K (in moving space) is after its objects
719   // or would require iterating super-class hierarchy when visiting references. And V is
720   // its lowest address object (in moving space).
721   ObjObjUnorderedMap class_after_obj_hash_map_;
722   // Ordered map constructed before starting compaction using the above two maps. Key is a
723   // class (or super-class) which is higher in address order than some of its object(s) and
724   // value is the corresponding object with lowest address.
725   ObjObjOrderedMap class_after_obj_ordered_map_;
726   // Since the compaction is done in reverse, we use a reverse iterator. It is maintained
727   // either at the pair whose class is lower than the first page to be freed, or at the
728   // pair whose object is not yet compacted.
729   ObjObjOrderedMap::const_reverse_iterator class_after_obj_iter_;
730   // Cached reference to the last class which has kClassWalkSuper in reference
731   // bitmap but has all its super classes lower address order than itself.
732   mirror::Class* walk_super_class_cache_;
733   // Used by FreeFromSpacePages() for maintaining markers in the moving space for
734   // how far the pages have been reclaimed (madvised) and checked.
735   //
736   // Pages from this index to the end of to-space have been checked (via page_status)
737   // and their corresponding from-space pages are reclaimable.
738   size_t last_checked_reclaim_page_idx_;
739   // All from-space pages in [last_reclaimed_page_, from_space->End()) are
740   // reclaimed (madvised). Pages in [from-space page corresponding to
741   // last_checked_reclaim_page_idx_, last_reclaimed_page_) are not reclaimed as
742   // they may contain classes required for class hierarchy traversal for
743   // visiting references during compaction.
744   uint8_t* last_reclaimed_page_;
745   // All the pages in [last_reclaimable_page_, last_reclaimed_page_) in
746   // from-space are available to store compacted contents for batching until the
747   // next time madvise is called.
748   uint8_t* last_reclaimable_page_;
749   // [cur_reclaimable_page_, last_reclaimed_page_) have been used to store
750   // compacted contents for batching.
751   uint8_t* cur_reclaimable_page_;
752 
753   space::ContinuousSpace* non_moving_space_;
754   space::BumpPointerSpace* const bump_pointer_space_;
755   // The main space bitmap
756   accounting::ContinuousSpaceBitmap* const moving_space_bitmap_;
757   accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_;
758   Thread* thread_running_gc_;
759   // Array of moving-space's pages' compaction status, which is stored in the
760   // least-significant byte. kProcessed entries also contain the from-space
761   // offset of the page which contains the compacted contents of the ith
762   // to-space page.
763   Atomic<uint32_t>* moving_pages_status_;
764   size_t vector_length_;
765   size_t live_stack_freeze_size_;
766 
767   uint64_t bytes_scanned_;
768 
769   // For every page in the to-space (post-compact heap) we need to know the
770   // first object from which we must compact and/or update references. This is
771   // for both non-moving and moving space. Additionally, for the moving-space,
772   // we also need the offset within the object from where we need to start
773   // copying.
774   // chunk_info_vec_ holds live bytes for chunks during marking phase. After
775   // marking we perform an exclusive scan to compute offset for every chunk.
776   uint32_t* chunk_info_vec_;
777   // For pages before black allocations, pre_compact_offset_moving_space_[i]
778   // holds offset within the space from where the objects need to be copied in
779   // the ith post-compact page.
780   // Otherwise, black_alloc_pages_first_chunk_size_[i] holds the size of first
781   // non-empty chunk in the ith black-allocations page.
782   union {
783     uint32_t* pre_compact_offset_moving_space_;
784     uint32_t* black_alloc_pages_first_chunk_size_;
785   };
786   // first_objs_moving_space_[i] is the pre-compact address of the object which
787   // would overlap with the starting boundary of the ith post-compact page.
788   ObjReference* first_objs_moving_space_;
789   // First object for every page. It could be greater than the page's start
790   // address, or null if the page is empty.
791   ObjReference* first_objs_non_moving_space_;
792   size_t non_moving_first_objs_count_;
793   // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_
794   // arrays. Also the number of pages which are to be compacted.
795   size_t moving_first_objs_count_;
796   // Number of pages containing black-allocated objects, indicating number of
797   // pages to be slid.
798   size_t black_page_count_;
799 
800   uint8_t* from_space_begin_;
801   // Cached values of moving-space range to optimize checking if reference
802   // belongs to moving-space or not. May get updated if and when heap is
803   // clamped.
804   uint8_t* const moving_space_begin_;
805   uint8_t* moving_space_end_;
806   // moving-space's end pointer at the marking pause. All allocations beyond
807   // this will be considered black in the current GC cycle. Aligned up to page
808   // size.
809   uint8_t* black_allocations_begin_;
810   // End of compacted space. Use for computing post-compact addr of black
811   // allocated objects. Aligned up to page size.
812   uint8_t* post_compact_end_;
813   // Cache (black_allocations_begin_ - post_compact_end_) for post-compact
814   // address computations.
815   ptrdiff_t black_objs_slide_diff_;
816   // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can
817   // compute from-space address of a given pre-comapct addr efficiently.
818   ptrdiff_t from_space_slide_diff_;
819 
820   // TODO: Remove once an efficient mechanism to deal with double root updation
821   // is incorporated.
822   void* stack_high_addr_;
823   void* stack_low_addr_;
824 
825   uint8_t* conc_compaction_termination_page_;
826 
827   PointerSize pointer_size_;
828   // Number of objects freed during this GC in moving space. It is decremented
829   // every time an object is discovered. And total-object count is added to it
830   // in MarkingPause(). It reaches the correct count only once the marking phase
831   // is completed.
832   int32_t freed_objects_;
833   // memfds for moving space for using userfaultfd's minor-fault feature.
834   // Initialized to kFdUnused to indicate that mmap should be MAP_PRIVATE in
835   // KernelPrepareRange().
836   int moving_to_space_fd_;
837   int moving_from_space_fd_;
838   // Userfault file descriptor, accessed only by the GC itself.
839   // kFallbackMode value indicates that we are in the fallback mode.
840   int uffd_;
841   // Number of mutator-threads currently executing SIGBUS handler. When the
842   // GC-thread is done with compaction, it set the most significant bit to
843   // indicate that. Mutator threads check for the flag when incrementing in the
844   // handler.
845   std::atomic<SigbusCounterType> sigbus_in_progress_count_;
846   // Number of mutator-threads/uffd-workers working on moving-space page. It
847   // must be 0 before gc-thread can unregister the space after it's done
848   // sequentially compacting all pages of the space.
849   std::atomic<uint16_t> compaction_in_progress_count_;
850   // When using SIGBUS feature, this counter is used by mutators to claim a page
851   // out of compaction buffers to be used for the entire compaction cycle.
852   std::atomic<uint16_t> compaction_buffer_counter_;
853   // Used to exit from compaction loop at the end of concurrent compaction
854   uint8_t thread_pool_counter_;
855   // True while compacting.
856   bool compacting_;
857   // Flag indicating whether one-time uffd initialization has been done. It will
858   // be false on the first GC for non-zygote processes, and always for zygote.
859   // Its purpose is to minimize the userfaultfd overhead to the minimal in
860   // Heap::PostForkChildAction() as it's invoked in app startup path. With
861   // this, we register the compaction-termination page on the first GC.
862   bool uffd_initialized_;
863   // Flag indicating if userfaultfd supports minor-faults. Set appropriately in
864   // CreateUserfaultfd(), where we get this information from the kernel.
865   const bool uffd_minor_fault_supported_;
866   // Flag indicating if we should use sigbus signals instead of threads to
867   // handle userfaults.
868   const bool use_uffd_sigbus_;
869   // For non-zygote processes this flag indicates if the spaces are ready to
870   // start using userfaultfd's minor-fault feature. This initialization involves
871   // starting to use shmem (memfd_create) for the userfaultfd protected spaces.
872   bool minor_fault_initialized_;
873   // Set to true when linear-alloc can start mapping with MAP_SHARED. Set on
874   // non-zygote processes during first GC, which sets up everyting for using
875   // minor-fault from next GC.
876   bool map_linear_alloc_shared_;
877   // Clamping statue of `info_map_`. Initialized with 'NotDone'. Once heap is
878   // clamped but info_map_ is delayed, we set it to 'Pending'. Once 'info_map_'
879   // is also clamped, then we set it to 'Finished'.
880   ClampInfoStatus clamp_info_map_status_;
881 
882   class FlipCallback;
883   class ThreadFlipVisitor;
884   class VerifyRootMarkedVisitor;
885   class ScanObjectVisitor;
886   class CheckpointMarkThreadRoots;
887   template <size_t kBufferSize>
888   class ThreadRootsVisitor;
889   class RefFieldsVisitor;
890   template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor;
891   class ArenaPoolPageUpdater;
892   class ClassLoaderRootsUpdater;
893   class LinearAllocPageUpdater;
894   class ImmuneSpaceUpdateObjVisitor;
895   class ConcurrentCompactionGcTask;
896 
897   DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact);
898 };
899 
900 std::ostream& operator<<(std::ostream& os, MarkCompact::PageState value);
901 std::ostream& operator<<(std::ostream& os, MarkCompact::ClampInfoStatus value);
902 
903 }  // namespace collector
904 }  // namespace gc
905 }  // namespace art
906 
907 #endif  // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_
908