1 /*
2  * Copyright 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30 
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36 
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/task_processor.h"
52 #include "gc/verification-inl.h"
53 #include "jit/jit_code_cache.h"
54 #include "mark_compact-inl.h"
55 #include "mirror/object-refvisitor-inl.h"
56 #include "read_barrier_config.h"
57 #include "scoped_thread_state_change-inl.h"
58 #include "sigchain.h"
59 #include "thread_list.h"
60 
61 #ifdef ART_TARGET_ANDROID
62 #include "android-modules-utils/sdk_level.h"
63 #include "com_android_art.h"
64 #endif
65 
66 #ifndef __BIONIC__
67 #ifndef MREMAP_DONTUNMAP
68 #define MREMAP_DONTUNMAP 4
69 #endif
70 #ifndef MAP_FIXED_NOREPLACE
71 #define MAP_FIXED_NOREPLACE 0x100000
72 #endif
73 #endif  // __BIONIC__
74 
75 // See aosp/2996596 for where these values came from.
76 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
77 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
78 #endif
79 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
80 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
81 #endif
82 #ifdef ART_TARGET_ANDROID
83 namespace {
84 
85 using ::android::base::GetBoolProperty;
86 using ::android::base::ParseBool;
87 using ::android::base::ParseBoolResult;
88 using ::android::modules::sdklevel::IsAtLeastV;
89 
90 }  // namespace
91 #endif
92 
93 namespace art HIDDEN {
94 
HaveMremapDontunmap()95 static bool HaveMremapDontunmap() {
96   const size_t page_size = GetPageSizeSlow();
97   void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
98   CHECK_NE(old, MAP_FAILED);
99   void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
100   CHECK_EQ(munmap(old, page_size), 0);
101   if (addr != MAP_FAILED) {
102     CHECK_EQ(munmap(addr, page_size), 0);
103     return true;
104   } else {
105     return false;
106   }
107 }
108 
109 static bool gUffdSupportsMmapTrylock = false;
110 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
111 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
112 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
113 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
114 static uint64_t gUffdFeatures = 0;
115 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
116 static constexpr uint64_t kUffdFeaturesForMinorFault =
117     UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
118 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
119 
120 // We consider SIGBUS feature necessary to enable this GC as it's superior than
121 // threading-based implementation for janks. However, since we have the latter
122 // already implemented, for testing purposes, we allow choosing either of the
123 // two at boot time in the constructor below.
124 // We may want minor-fault in future to be available for making jit-code-cache
125 // updation concurrent, which uses shmem.
KernelSupportsUffd()126 bool KernelSupportsUffd() {
127 #ifdef __linux__
128   if (gHaveMremapDontunmap) {
129     int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
130     // On non-android devices we may not have the kernel patches that restrict
131     // userfaultfd to user mode. But that is not a security concern as we are
132     // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
133     if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
134       fd = syscall(__NR_userfaultfd, O_CLOEXEC);
135     }
136     if (fd >= 0) {
137       // We are only fetching the available features, which is returned by the
138       // ioctl.
139       struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
140       CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
141       gUffdFeatures = api.features;
142       // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
143       // versions will have per-vma locks. The lower ones don't support
144       // userfaultfd.
145       if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
146         // Check if MMAP_TRYLOCK feature is supported
147         const size_t page_size = GetPageSizeSlow();
148         void* mem =
149             mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
150         CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
151 
152         struct uffdio_zeropage uffd_zeropage;
153         uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
154         uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
155         uffd_zeropage.range.len = page_size;
156         uffd_zeropage.zeropage = 0;
157         // The ioctl will definitely fail as mem is not registered with uffd.
158         CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
159         // uffd ioctls return EINVAL for several reasons. We make sure with
160         // (proper alignment of 'mem' and 'len') that, before updating
161         // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
162         // `trylock` isn't available.
163         if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
164           LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
165                     << " page-size:" << page_size;
166         } else {
167           gUffdSupportsMmapTrylock = true;
168           LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
169                     << " size:" << page_size;
170         }
171         munmap(mem, page_size);
172       }
173       close(fd);
174       // Minimum we need is sigbus feature for using userfaultfd.
175       return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
176     }
177   }
178 #endif
179   return false;
180 }
181 
182 // The other cases are defined as constexpr in runtime/read_barrier_config.h
183 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
184 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()185 static gc::CollectorType FetchCmdlineGcType() {
186   std::string argv;
187   gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
188   if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
189     auto pos = argv.rfind("-Xgc:");
190     if (argv.substr(pos + 5, 3) == "CMC") {
191       gc_type = gc::CollectorType::kCollectorTypeCMC;
192     } else if (argv.substr(pos + 5, 2) == "CC") {
193       gc_type = gc::CollectorType::kCollectorTypeCC;
194     }
195   }
196   return gc_type;
197 }
198 
199 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()200 static int GetOverrideCacheInfoFd() {
201   std::string args_str;
202   if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
203     LOG(WARNING) << "Failed to load /proc/self/cmdline";
204     return -1;
205   }
206   std::vector<std::string_view> args;
207   Split(std::string_view(args_str), /*separator=*/'\0', &args);
208   for (std::string_view arg : args) {
209     if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) {  // This is a dex2oat flag.
210       int fd;
211       if (!android::base::ParseInt(std::string(arg), &fd)) {
212         LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
213         return -1;
214       }
215       return fd;
216     }
217   }
218   return -1;
219 }
220 
GetCachedProperties()221 static std::unordered_map<std::string, std::string> GetCachedProperties() {
222   // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
223   static bool called = false;
224   CHECK(!called) << "GetCachedBoolProperty can be called only once";
225   called = true;
226 
227   std::string cache_info_contents;
228   int fd = GetOverrideCacheInfoFd();
229   if (fd >= 0) {
230     if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
231       PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
232       return {};
233     }
234   } else {
235     std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
236     if (!android::base::ReadFileToString(path, &cache_info_contents)) {
237       // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
238       // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
239       // nothing we can do.
240       if (errno != ENOENT) {
241         PLOG(ERROR) << "Failed to read cache-info from the default path";
242       }
243       return {};
244     }
245   }
246 
247   std::optional<com::android::art::CacheInfo> cache_info =
248       com::android::art::parse(cache_info_contents.c_str());
249   if (!cache_info.has_value()) {
250     // This should never happen.
251     LOG(ERROR) << "Failed to parse cache-info";
252     return {};
253   }
254   const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
255   if (list == nullptr) {
256     // This should never happen.
257     LOG(ERROR) << "Missing system properties from cache-info";
258     return {};
259   }
260   const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
261   std::unordered_map<std::string, std::string> result;
262   for (const com::android::art::KeyValuePair& pair : properties) {
263     result[pair.getK()] = pair.getV();
264   }
265   return result;
266 }
267 
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)268 static bool GetCachedBoolProperty(
269     const std::unordered_map<std::string, std::string>& cached_properties,
270     const std::string& key,
271     bool default_value) {
272   auto it = cached_properties.find(key);
273   if (it == cached_properties.end()) {
274     return default_value;
275   }
276   ParseBoolResult result = ParseBool(it->second);
277   switch (result) {
278     case ParseBoolResult::kTrue:
279       return true;
280     case ParseBoolResult::kFalse:
281       return false;
282     case ParseBoolResult::kError:
283       return default_value;
284   }
285 }
286 
SysPropSaysUffdGc()287 static bool SysPropSaysUffdGc() {
288   // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
289   // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
290   std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
291   bool phenotype_enable = GetCachedBoolProperty(
292       cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
293   bool phenotype_force_disable = GetCachedBoolProperty(
294       cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
295   bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
296   bool is_at_most_u = !IsAtLeastV();
297   return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
298 }
299 #else
300 // Never called.
SysPropSaysUffdGc()301 static bool SysPropSaysUffdGc() { return false; }
302 #endif
303 
ShouldUseUserfaultfd()304 static bool ShouldUseUserfaultfd() {
305   static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
306 #ifdef __linux__
307   // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
308   // always use CC on host. On target, use CMC only if system properties says so
309   // and the kernel supports it.
310   gc::CollectorType gc_type = FetchCmdlineGcType();
311   return gc_type == gc::CollectorType::kCollectorTypeCMC ||
312          (gc_type == gc::CollectorType::kCollectorTypeNone &&
313           kIsTargetAndroid &&
314           SysPropSaysUffdGc() &&
315           KernelSupportsUffd());
316 #else
317   return false;
318 #endif
319 }
320 
321 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
322 const bool gUseReadBarrier = !gUseUserfaultfd;
323 #endif
324 
325 namespace gc {
326 namespace collector {
327 
328 // Turn off kCheckLocks when profiling the GC as it slows down the GC
329 // significantly.
330 static constexpr bool kCheckLocks = kDebugLocking;
331 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
332 // Two threads should suffice on devices.
333 static constexpr size_t kMaxNumUffdWorkers = 2;
334 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
335 // case. It's extremely unlikely that we will ever have more than these number
336 // of mutator threads trying to access the moving-space during one compaction
337 // phase.
338 static constexpr size_t kMutatorCompactionBufferCount = 2048;
339 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
340 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
341 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
342 // Concurrent compaction termination logic is different (and slightly more efficient) if the
343 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
344 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
345 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
346 // up the GC thread at the end.
347 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
348 
GetUffdAndMinorFault()349 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
350   bool uffd_available;
351   // In most cases the gUffdFeatures will already be initialized at boot time
352   // when libart is loaded. On very old kernels we may get '0' from the kernel,
353   // in which case we would be doing the syscalls each time this function is
354   // called. But that's very unlikely case. There are no correctness issues as
355   // the response from kernel never changes after boot.
356   if (UNLIKELY(gUffdFeatures == 0)) {
357     uffd_available = KernelSupportsUffd();
358   } else {
359     // We can have any uffd features only if uffd exists.
360     uffd_available = true;
361   }
362   bool minor_fault_available =
363       (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
364   return std::pair<bool, bool>(uffd_available, minor_fault_available);
365 }
366 
CreateUserfaultfd(bool post_fork)367 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
368   if (post_fork || uffd_ == kFdUnused) {
369     // Check if we have MREMAP_DONTUNMAP here for cases where
370     // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
371     // that userfaultfd isn't used on old kernels, which cause random ioctl
372     // failures.
373     if (gHaveMremapDontunmap) {
374       // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
375       // any read event available. We don't use poll.
376       uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
377       // On non-android devices we may not have the kernel patches that restrict
378       // userfaultfd to user mode. But that is not a security concern as we are
379       // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
380       if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
381         uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
382       }
383       if (UNLIKELY(uffd_ == -1)) {
384         uffd_ = kFallbackMode;
385         LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
386                      << ") and therefore falling back to stop-the-world compaction.";
387       } else {
388         DCHECK(IsValidFd(uffd_));
389         // Initialize uffd with the features which are required and available.
390         // Using private anonymous mapping in threading mode is the default,
391         // for which we don't need to ask for any features. Note: this mode
392         // is not used in production.
393         struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
394         if (use_uffd_sigbus_) {
395           // We should add SIGBUS feature only if we plan on using it as
396           // requesting it here will mean threading mode will not work.
397           CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
398           api.features |= kUffdFeaturesForSigbus;
399         }
400         if (uffd_minor_fault_supported_) {
401           // NOTE: This option is currently disabled.
402           CHECK_EQ(gUffdFeatures & kUffdFeaturesForMinorFault, kUffdFeaturesForMinorFault);
403           api.features |= kUffdFeaturesForMinorFault;
404         }
405         CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
406             << "ioctl_userfaultfd: API: " << strerror(errno);
407       }
408     } else {
409       uffd_ = kFallbackMode;
410     }
411   }
412   uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
413   return IsValidFd(uffd_);
414 }
415 
416 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)417 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
418     uintptr_t begin, uintptr_t end) {
419   return static_cast<LiveWordsBitmap<kAlignment>*>(
420           MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
421 }
422 
IsSigbusFeatureAvailable()423 static bool IsSigbusFeatureAvailable() {
424   MarkCompact::GetUffdAndMinorFault();
425   return (gUffdFeatures & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
426 }
427 
ComputeInfoMapSize()428 size_t MarkCompact::ComputeInfoMapSize() {
429   size_t moving_space_size = bump_pointer_space_->Capacity();
430   size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
431   size_t nr_moving_pages = DivideByPageSize(moving_space_size);
432   size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
433   return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
434          nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
435 }
436 
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)437 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
438   size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
439 
440   chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
441   vector_length_ = moving_space_sz / kOffsetChunkSize;
442   size_t total = vector_length_ * sizeof(uint32_t);
443 
444   first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
445   total += nr_moving_pages * sizeof(ObjReference);
446 
447   pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
448   total += nr_moving_pages * sizeof(uint32_t);
449 
450   moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
451   total += nr_moving_pages * sizeof(Atomic<uint32_t>);
452 
453   first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
454   total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
455   DCHECK_EQ(total, ComputeInfoMapSize());
456   return total;
457 }
458 
MarkCompact(Heap * heap)459 MarkCompact::MarkCompact(Heap* heap)
460     : GarbageCollector(heap, "concurrent mark compact"),
461       gc_barrier_(0),
462       lock_("mark compact lock", kGenericBottomLock),
463       bump_pointer_space_(heap->GetBumpPointerSpace()),
464       moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
465       moving_space_begin_(bump_pointer_space_->Begin()),
466       moving_space_end_(bump_pointer_space_->Limit()),
467       moving_to_space_fd_(kFdUnused),
468       moving_from_space_fd_(kFdUnused),
469       uffd_(kFdUnused),
470       sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask),
471       compaction_in_progress_count_(0),
472       thread_pool_counter_(0),
473       compacting_(false),
474       uffd_initialized_(false),
475       uffd_minor_fault_supported_(false),
476       use_uffd_sigbus_(IsSigbusFeatureAvailable()),
477       minor_fault_initialized_(false),
478       map_linear_alloc_shared_(false),
479       clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
480   if (kIsDebugBuild) {
481     updated_roots_.reset(new std::unordered_set<void*>());
482   }
483   // TODO: When using minor-fault feature, the first GC after zygote-fork
484   // requires mapping the linear-alloc again with MAP_SHARED. This leaves a
485   // gap for suspended threads to access linear-alloc when it's empty (after
486   // mremap) and not yet userfaultfd registered. This cannot be fixed by merely
487   // doing uffd registration first. For now, just assert that we are not using
488   // minor-fault. Eventually, a cleanup of linear-alloc update logic to only
489   // use private anonymous would be ideal.
490   CHECK(!uffd_minor_fault_supported_);
491   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
492 
493   // TODO: Depending on how the bump-pointer space move is implemented. If we
494   // switch between two virtual memories each time, then we will have to
495   // initialize live_words_bitmap_ accordingly.
496   live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
497       reinterpret_cast<uintptr_t>(moving_space_begin),
498       reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
499 
500   std::string err_msg;
501   size_t moving_space_size = bump_pointer_space_->Capacity();
502   {
503     // Create one MemMap for all the data structures
504     info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
505                                      ComputeInfoMapSize(),
506                                      PROT_READ | PROT_WRITE,
507                                      /*low_4gb=*/false,
508                                      &err_msg);
509     if (UNLIKELY(!info_map_.IsValid())) {
510       LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
511     } else {
512       size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
513       DCHECK_EQ(total, info_map_.Size());
514     }
515   }
516 
517   size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
518   // The moving space is created at a fixed address, which is expected to be
519   // PMD-size aligned.
520   if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
521     LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
522                  << ". This can lead to longer stop-the-world pauses for compaction";
523   }
524   // NOTE: PROT_NONE is used here as these mappings are for address space reservation
525   // only and will be used only after appropriately remapping them.
526   from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
527                                                 moving_space_size,
528                                                 PROT_NONE,
529                                                 /*low_4gb=*/kObjPtrPoisoning,
530                                                 moving_space_alignment,
531                                                 &err_msg);
532   if (UNLIKELY(!from_space_map_.IsValid())) {
533     LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
534   } else {
535     from_space_begin_ = from_space_map_.Begin();
536   }
537 
538   // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3
539   // heap-sized mappings in low-4GB. So tolerate failure here by attempting to
540   // mmap again right before the compaction pause. And if even that fails, then
541   // running the GC cycle in copy-mode rather than minor-fault.
542   //
543   // This map doesn't have to be aligned to 2MB as we don't mremap on it.
544   if (!kObjPtrPoisoning && uffd_minor_fault_supported_) {
545     // We need this map only if minor-fault feature is supported. But in that case
546     // don't create the mapping if obj-ptr poisoning is enabled as then the mapping
547     // has to be created in low_4gb. Doing this here rather than later causes the
548     // Dex2oatImageTest.TestExtension gtest to fail in 64-bit platforms.
549     shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
550                                                 moving_space_size,
551                                                 PROT_NONE,
552                                                 /*low_4gb=*/false,
553                                                 &err_msg);
554     if (!shadow_to_space_map_.IsValid()) {
555       LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg;
556     }
557   }
558   const size_t num_pages =
559       1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount :
560                               std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
561   compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
562                                                  gPageSize * num_pages,
563                                                  PROT_READ | PROT_WRITE,
564                                                  /*low_4gb=*/kObjPtrPoisoning,
565                                                  &err_msg);
566   if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
567     LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
568   }
569   // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
570   conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
571   // Touch the page deliberately to avoid userfaults on it. We madvise it in
572   // CompactionPhase() before using it to terminate concurrent compaction.
573   ForceRead(conc_compaction_termination_page_);
574 
575   // In most of the cases, we don't expect more than one LinearAlloc space.
576   linear_alloc_spaces_data_.reserve(1);
577 
578   // Ensure that huge-pages are not used on the moving-space, which may happen
579   // if THP is 'always' enabled and breaks our assumption that a normal-page is
580   // mapped when any address is accessed.
581   int ret = madvise(moving_space_begin, moving_space_size, MADV_NOHUGEPAGE);
582   // Some devices may not have THP configured in the kernel. On such devices
583   // madvise will fail with EINVAL. Obviously, on such devices this madvise is
584   // not required in the first place.
585   CHECK(ret == 0 || errno == EINVAL);
586 
587   // Initialize GC metrics.
588   metrics::ArtMetrics* metrics = GetMetrics();
589   // The mark-compact collector supports only full-heap collections at the moment.
590   gc_time_histogram_ = metrics->FullGcCollectionTime();
591   metrics_gc_count_ = metrics->FullGcCount();
592   metrics_gc_count_delta_ = metrics->FullGcCountDelta();
593   gc_throughput_histogram_ = metrics->FullGcThroughput();
594   gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
595   gc_throughput_avg_ = metrics->FullGcThroughputAvg();
596   gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
597   gc_scanned_bytes_ = metrics->FullGcScannedBytes();
598   gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
599   gc_freed_bytes_ = metrics->FullGcFreedBytes();
600   gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
601   gc_duration_ = metrics->FullGcDuration();
602   gc_duration_delta_ = metrics->FullGcDurationDelta();
603   are_metrics_initialized_ = true;
604 }
605 
AddLinearAllocSpaceData(uint8_t * begin,size_t len)606 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
607   DCHECK_ALIGNED_PARAM(begin, gPageSize);
608   DCHECK_ALIGNED_PARAM(len, gPageSize);
609   DCHECK_GE(len, Heap::GetPMDSize());
610   size_t alignment = Heap::BestPageTableAlignment(len);
611   bool is_shared = false;
612   // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
613   if (map_linear_alloc_shared_) {
614     void* ret = mmap(begin,
615                      len,
616                      PROT_READ | PROT_WRITE,
617                      MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
618                      /*fd=*/-1,
619                      /*offset=*/0);
620     CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
621     is_shared = true;
622   }
623   std::string err_msg;
624   MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
625                                             len,
626                                             PROT_NONE,
627                                             /*low_4gb=*/false,
628                                             alignment,
629                                             &err_msg));
630   if (!shadow.IsValid()) {
631     LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
632     UNREACHABLE();
633   }
634 
635   MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
636                                               DivideByPageSize(len),
637                                               PROT_READ | PROT_WRITE,
638                                               /*low_4gb=*/false,
639                                               &err_msg));
640   if (!page_status_map.IsValid()) {
641     LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
642     UNREACHABLE();
643   }
644   linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow),
645                                          std::forward<MemMap>(page_status_map),
646                                          begin,
647                                          begin + len,
648                                          is_shared);
649 }
650 
ClampGrowthLimit(size_t new_capacity)651 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
652   // From-space is the same size as moving-space in virtual memory.
653   // However, if it's in >4GB address space then we don't need to do it
654   // synchronously.
655 #if defined(__LP64__)
656   constexpr bool kClampFromSpace = kObjPtrPoisoning;
657 #else
658   constexpr bool kClampFromSpace = true;
659 #endif
660   size_t old_capacity = bump_pointer_space_->Capacity();
661   new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
662   if (new_capacity < old_capacity) {
663     CHECK(from_space_map_.IsValid());
664     if (kClampFromSpace) {
665       from_space_map_.SetSize(new_capacity);
666     }
667     // NOTE: We usually don't use shadow_to_space_map_ and therefore the condition will
668     // mostly be false.
669     if (shadow_to_space_map_.IsValid() && shadow_to_space_map_.Size() > new_capacity) {
670       shadow_to_space_map_.SetSize(new_capacity);
671     }
672     clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
673   }
674   CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
675 }
676 
MaybeClampGcStructures()677 void MarkCompact::MaybeClampGcStructures() {
678   size_t moving_space_size = bump_pointer_space_->Capacity();
679   DCHECK(thread_running_gc_ != nullptr);
680   if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
681     CHECK(from_space_map_.IsValid());
682     if (from_space_map_.Size() > moving_space_size) {
683       from_space_map_.SetSize(moving_space_size);
684     }
685     // Bitmaps and other data structures
686     live_words_bitmap_->SetBitmapSize(moving_space_size);
687     size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
688     CHECK_LT(set_size, info_map_.Size());
689     info_map_.SetSize(set_size);
690 
691     clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
692   }
693 }
694 
PrepareCardTableForMarking(bool clear_alloc_space_cards)695 void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) {
696   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
697   accounting::CardTable* const card_table = heap_->GetCardTable();
698   // immune_spaces_ is emptied in InitializePhase() before marking starts. This
699   // function is invoked twice during marking. We only need to populate immune_spaces_
700   // once per GC cycle. And when it's done (below), all the immune spaces are
701   // added to it. We can never have partially filled immune_spaces_.
702   bool update_immune_spaces = immune_spaces_.IsEmpty();
703   // Mark all of the spaces we never collect as immune.
704   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
705     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
706         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
707       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
708       if (update_immune_spaces) {
709         immune_spaces_.AddSpace(space);
710       }
711       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
712       if (table != nullptr) {
713         table->ProcessCards();
714       } else {
715         // Keep cards aged if we don't have a mod-union table since we need
716         // to scan them in future GCs. This case is for app images.
717         card_table->ModifyCardsAtomic(
718             space->Begin(),
719             space->End(),
720             [](uint8_t card) {
721               return (card == gc::accounting::CardTable::kCardClean)
722                   ? card
723                   : gc::accounting::CardTable::kCardAged;
724             },
725             /* card modified visitor */ VoidFunctor());
726       }
727     } else if (clear_alloc_space_cards) {
728       CHECK(!space->IsZygoteSpace());
729       CHECK(!space->IsImageSpace());
730       // The card-table corresponding to bump-pointer and non-moving space can
731       // be cleared, because we are going to traverse all the reachable objects
732       // in these spaces. This card-table will eventually be used to track
733       // mutations while concurrent marking is going on.
734       card_table->ClearCardRange(space->Begin(), space->Limit());
735       if (space != bump_pointer_space_) {
736         CHECK_EQ(space, heap_->GetNonMovingSpace());
737         non_moving_space_ = space;
738         non_moving_space_bitmap_ = space->GetMarkBitmap();
739       }
740     } else {
741       card_table->ModifyCardsAtomic(
742           space->Begin(),
743           space->End(),
744           [](uint8_t card) {
745             return (card == gc::accounting::CardTable::kCardDirty) ?
746                        gc::accounting::CardTable::kCardAged :
747                        gc::accounting::CardTable::kCardClean;
748           },
749           /* card modified visitor */ VoidFunctor());
750     }
751   }
752 }
753 
MarkZygoteLargeObjects()754 void MarkCompact::MarkZygoteLargeObjects() {
755   Thread* self = thread_running_gc_;
756   DCHECK_EQ(self, Thread::Current());
757   space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
758   if (los != nullptr) {
759     // Pick the current live bitmap (mark bitmap if swapped).
760     accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
761     accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
762     // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
763     std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
764     live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
765                                   reinterpret_cast<uintptr_t>(range.second),
766                                   [mark_bitmap, los, self](mirror::Object* obj)
767                                       REQUIRES(Locks::heap_bitmap_lock_)
768                                           REQUIRES_SHARED(Locks::mutator_lock_) {
769                                             if (los->IsZygoteLargeObject(self, obj)) {
770                                               mark_bitmap->Set(obj);
771                                             }
772                                           });
773   }
774 }
775 
InitializePhase()776 void MarkCompact::InitializePhase() {
777   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
778   mark_stack_ = heap_->GetMarkStack();
779   CHECK(mark_stack_->IsEmpty());
780   immune_spaces_.Reset();
781   moving_first_objs_count_ = 0;
782   non_moving_first_objs_count_ = 0;
783   black_page_count_ = 0;
784   bytes_scanned_ = 0;
785   freed_objects_ = 0;
786   // The first buffer is used by gc-thread.
787   compaction_buffer_counter_.store(1, std::memory_order_relaxed);
788   from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin();
789   black_allocations_begin_ = bump_pointer_space_->Limit();
790   CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
791   moving_space_end_ = bump_pointer_space_->Limit();
792   walk_super_class_cache_ = nullptr;
793   // TODO: Would it suffice to read it once in the constructor, which is called
794   // in zygote process?
795   pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
796 }
797 
798 class MarkCompact::ThreadFlipVisitor : public Closure {
799  public:
ThreadFlipVisitor(MarkCompact * collector)800   explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
801 
Run(Thread * thread)802   void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
803     // Note: self is not necessarily equal to thread since thread may be suspended.
804     Thread* self = Thread::Current();
805     CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
806         << thread->GetState() << " thread " << thread << " self " << self;
807     thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
808     // Interpreter cache is thread-local so it needs to be swept either in a
809     // flip, or a stop-the-world pause.
810     CHECK(collector_->compacting_);
811     thread->SweepInterpreterCache(collector_);
812     thread->AdjustTlab(collector_->black_objs_slide_diff_);
813   }
814 
815  private:
816   MarkCompact* const collector_;
817 };
818 
819 class MarkCompact::FlipCallback : public Closure {
820  public:
FlipCallback(MarkCompact * collector)821   explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
822 
Run(Thread * thread)823   void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
824     collector_->CompactionPause();
825   }
826 
827  private:
828   MarkCompact* const collector_;
829 };
830 
RunPhases()831 void MarkCompact::RunPhases() {
832   Thread* self = Thread::Current();
833   thread_running_gc_ = self;
834   Runtime* runtime = Runtime::Current();
835   InitializePhase();
836   GetHeap()->PreGcVerification(this);
837   {
838     ReaderMutexLock mu(self, *Locks::mutator_lock_);
839     MarkingPhase();
840   }
841   {
842     // Marking pause
843     ScopedPause pause(this);
844     MarkingPause();
845     if (kIsDebugBuild) {
846       bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
847     }
848   }
849   {
850     ReaderMutexLock mu(self, *Locks::mutator_lock_);
851     ReclaimPhase();
852     PrepareForCompaction();
853   }
854   if (uffd_ != kFallbackMode && !use_uffd_sigbus_) {
855     heap_->GetThreadPool()->WaitForWorkersToBeCreated();
856   }
857 
858   {
859     // Compaction pause
860     ThreadFlipVisitor visitor(this);
861     FlipCallback callback(this);
862     runtime->GetThreadList()->FlipThreadRoots(
863         &visitor, &callback, this, GetHeap()->GetGcPauseListener());
864   }
865 
866   if (IsValidFd(uffd_)) {
867     ReaderMutexLock mu(self, *Locks::mutator_lock_);
868     CompactionPhase();
869   }
870 
871   FinishPhase();
872   thread_running_gc_ = nullptr;
873 }
874 
InitMovingSpaceFirstObjects(const size_t vec_len)875 void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
876   // Find the first live word first.
877   size_t to_space_page_idx = 0;
878   uint32_t offset_in_chunk_word;
879   uint32_t offset;
880   mirror::Object* obj;
881   const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
882 
883   size_t chunk_idx;
884   // Find the first live word in the space
885   for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
886     if (chunk_idx >= vec_len) {
887       // We don't have any live data on the moving-space.
888       return;
889     }
890   }
891   DCHECK_LT(chunk_idx, vec_len);
892   // Use live-words bitmap to find the first word
893   offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
894   offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
895   DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
896                                            << " chunk_idx=" << chunk_idx
897                                            << " N=0"
898                                            << " offset_in_word=" << offset_in_chunk_word
899                                            << " word=" << std::hex
900                                            << live_words_bitmap_->GetWord(chunk_idx);
901   // The first object doesn't require using FindPrecedingObject().
902   obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment);
903   // TODO: add a check to validate the object.
904 
905   pre_compact_offset_moving_space_[to_space_page_idx] = offset;
906   first_objs_moving_space_[to_space_page_idx].Assign(obj);
907   to_space_page_idx++;
908 
909   uint32_t page_live_bytes = 0;
910   while (true) {
911     for (; page_live_bytes <= gPageSize; chunk_idx++) {
912       if (chunk_idx >= vec_len) {
913         moving_first_objs_count_ = to_space_page_idx;
914         return;
915       }
916       page_live_bytes += chunk_info_vec_[chunk_idx];
917     }
918     chunk_idx--;
919     page_live_bytes -= gPageSize;
920     DCHECK_LE(page_live_bytes, kOffsetChunkSize);
921     DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
922         << " chunk_idx=" << chunk_idx
923         << " to_space_page_idx=" << to_space_page_idx
924         << " vec_len=" << vec_len;
925     DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
926     offset_in_chunk_word =
927             live_words_bitmap_->FindNthLiveWordOffset(
928                 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
929     offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
930     DCHECK(live_words_bitmap_->Test(offset))
931         << "offset=" << offset
932         << " chunk_idx=" << chunk_idx
933         << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
934         << " offset_in_word=" << offset_in_chunk_word
935         << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
936     // TODO: Can we optimize this for large objects? If we are continuing a
937     // large object that spans multiple pages, then we may be able to do without
938     // calling FindPrecedingObject().
939     //
940     // Find the object which encapsulates offset in it, which could be
941     // starting at offset itself.
942     obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
943     // TODO: add a check to validate the object.
944     pre_compact_offset_moving_space_[to_space_page_idx] = offset;
945     first_objs_moving_space_[to_space_page_idx].Assign(obj);
946     to_space_page_idx++;
947     chunk_idx++;
948   }
949 }
950 
InitNonMovingSpaceFirstObjects()951 void MarkCompact::InitNonMovingSpaceFirstObjects() {
952   accounting::ContinuousSpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
953   uintptr_t begin = reinterpret_cast<uintptr_t>(non_moving_space_->Begin());
954   const uintptr_t end = reinterpret_cast<uintptr_t>(non_moving_space_->End());
955   mirror::Object* prev_obj;
956   size_t page_idx;
957   {
958     // Find first live object
959     mirror::Object* obj = nullptr;
960     bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
961                                                   end,
962                                                   [&obj] (mirror::Object* o) {
963                                                     obj = o;
964                                                   });
965     if (obj == nullptr) {
966       // There are no live objects in the non-moving space
967       return;
968     }
969     page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
970     first_objs_non_moving_space_[page_idx++].Assign(obj);
971     prev_obj = obj;
972   }
973   // TODO: check obj is valid
974   uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
975                            + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
976   // For every page find the object starting from which we need to call
977   // VisitReferences. It could either be an object that started on some
978   // preceding page, or some object starting within this page.
979   begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
980   while (begin < end) {
981     // Utilize, if any, large object that started in some preceding page, but
982     // overlaps with this page as well.
983     if (prev_obj != nullptr && prev_obj_end > begin) {
984       DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
985       first_objs_non_moving_space_[page_idx].Assign(prev_obj);
986       mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
987       if (HasAddress(klass)) {
988         LOG(WARNING) << "found inter-page object " << prev_obj
989                      << " in non-moving space with klass " << klass
990                      << " in moving space";
991       }
992     } else {
993       prev_obj_end = 0;
994       // It's sufficient to only search for previous object in the preceding page.
995       // If no live object started in that page and some object had started in
996       // the page preceding to that page, which was big enough to overlap with
997       // the current page, then we wouldn't be in the else part.
998       prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
999       if (prev_obj != nullptr) {
1000         prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
1001                         + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
1002       }
1003       if (prev_obj_end > begin) {
1004         mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1005         if (HasAddress(klass)) {
1006           LOG(WARNING) << "found inter-page object " << prev_obj
1007                        << " in non-moving space with klass " << klass
1008                        << " in moving space";
1009         }
1010         first_objs_non_moving_space_[page_idx].Assign(prev_obj);
1011       } else {
1012         // Find the first live object in this page
1013         bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
1014                 begin,
1015                 begin + gPageSize,
1016                 [this, page_idx] (mirror::Object* obj) {
1017                   first_objs_non_moving_space_[page_idx].Assign(obj);
1018                 });
1019       }
1020       // An empty entry indicates that the page has no live objects and hence
1021       // can be skipped.
1022     }
1023     begin += gPageSize;
1024     page_idx++;
1025   }
1026   non_moving_first_objs_count_ = page_idx;
1027 }
1028 
CanCompactMovingSpaceWithMinorFault()1029 bool MarkCompact::CanCompactMovingSpaceWithMinorFault() {
1030   size_t min_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
1031   return minor_fault_initialized_ && shadow_to_space_map_.IsValid() &&
1032          shadow_to_space_map_.Size() >= min_size;
1033 }
1034 
1035 class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask {
1036  public:
ConcurrentCompactionGcTask(MarkCompact * collector,size_t idx)1037   explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx)
1038       : collector_(collector), index_(idx) {}
1039 
Run(Thread * self)1040   void Run([[maybe_unused]] Thread* self) override REQUIRES_SHARED(Locks::mutator_lock_) {
1041     if (collector_->CanCompactMovingSpaceWithMinorFault()) {
1042       collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr);
1043     } else {
1044       // The passed page/buf to ConcurrentCompaction is used by the thread as a
1045       // gPageSize buffer for compacting and updating objects into and then
1046       // passing the buf to uffd ioctls.
1047       uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * gPageSize;
1048       collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf);
1049     }
1050   }
1051 
1052  private:
1053   MarkCompact* const collector_;
1054   size_t index_;
1055 };
1056 
PrepareForCompaction()1057 void MarkCompact::PrepareForCompaction() {
1058   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1059   uint8_t* space_begin = bump_pointer_space_->Begin();
1060   size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize;
1061   DCHECK_LE(vector_len, vector_length_);
1062   for (size_t i = 0; i < vector_len; i++) {
1063     DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
1064     DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
1065   }
1066   InitMovingSpaceFirstObjects(vector_len);
1067   InitNonMovingSpaceFirstObjects();
1068 
1069   // TODO: We can do a lot of neat tricks with this offset vector to tune the
1070   // compaction as we wish. Originally, the compaction algorithm slides all
1071   // live objects towards the beginning of the heap. This is nice because it
1072   // keeps the spatial locality of objects intact.
1073   // However, sometimes it's desired to compact objects in certain portions
1074   // of the heap. For instance, it is expected that, over time,
1075   // objects towards the beginning of the heap are long lived and are always
1076   // densely packed. In this case, it makes sense to only update references in
1077   // there and not try to compact it.
1078   // Furthermore, we might have some large objects and may not want to move such
1079   // objects.
1080   // We can adjust, without too much effort, the values in the chunk_info_vec_ such
1081   // that the objects in the dense beginning area aren't moved. OTOH, large
1082   // objects, which could be anywhere in the heap, could also be kept from
1083   // moving by using a similar trick. The only issue is that by doing this we will
1084   // leave an unused hole in the middle of the heap which can't be used for
1085   // allocations until we do a *full* compaction.
1086   //
1087   // At this point every element in the chunk_info_vec_ contains the live-bytes
1088   // of the corresponding chunk. For old-to-new address computation we need
1089   // every element to reflect total live-bytes till the corresponding chunk.
1090 
1091   // Live-bytes count is required to compute post_compact_end_ below.
1092   uint32_t total;
1093   // Update the vector one past the heap usage as it is required for black
1094   // allocated objects' post-compact address computation.
1095   if (vector_len < vector_length_) {
1096     vector_len++;
1097     total = 0;
1098   } else {
1099     // Fetch the value stored in the last element before it gets overwritten by
1100     // std::exclusive_scan().
1101     total = chunk_info_vec_[vector_len - 1];
1102   }
1103   std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0);
1104   total += chunk_info_vec_[vector_len - 1];
1105 
1106   for (size_t i = vector_len; i < vector_length_; i++) {
1107     DCHECK_EQ(chunk_info_vec_[i], 0u);
1108   }
1109   post_compact_end_ = AlignUp(space_begin + total, gPageSize);
1110   CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * gPageSize);
1111   black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1112   // We shouldn't be consuming more space after compaction than pre-compaction.
1113   CHECK_GE(black_objs_slide_diff_, 0);
1114   // How do we handle compaction of heap portion used for allocations after the
1115   // marking-pause?
1116   // All allocations after the marking-pause are considered black (reachable)
1117   // for this GC cycle. However, they need not be allocated contiguously as
1118   // different mutators use TLABs. So we will compact the heap till the point
1119   // where allocations took place before the marking-pause. And everything after
1120   // that will be slid with TLAB holes, and then TLAB info in TLS will be
1121   // appropriately updated in the pre-compaction pause.
1122   // The chunk-info vector entries for the post marking-pause allocations will be
1123   // also updated in the pre-compaction pause.
1124 
1125   bool is_zygote = Runtime::Current()->IsZygote();
1126   if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) {
1127     if (!use_uffd_sigbus_) {
1128       // Register the buffer that we use for terminating concurrent compaction
1129       struct uffdio_register uffd_register;
1130       uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_);
1131       uffd_register.range.len = gPageSize;
1132       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1133       CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
1134           << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno);
1135     }
1136     if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) {
1137       // A valid shadow-map for moving space is only possible if we
1138       // were able to map it in the constructor. That also means that its size
1139       // matches the moving-space.
1140       CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1141       // Release the shadow map for moving-space if we don't support minor-fault
1142       // as it's not required.
1143       shadow_to_space_map_.Reset();
1144     }
1145   }
1146   // For zygote we create the thread pool each time before starting compaction,
1147   // and get rid of it when finished. This is expected to happen rarely as
1148   // zygote spends most of the time in native fork loop.
1149   if (uffd_ != kFallbackMode) {
1150     if (!use_uffd_sigbus_) {
1151       ThreadPool* pool = heap_->GetThreadPool();
1152       if (UNLIKELY(pool == nullptr)) {
1153         // On devices with 2 cores, GetParallelGCThreadCount() will return 1,
1154         // which is desired number of workers on such devices.
1155         heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
1156         pool = heap_->GetThreadPool();
1157       }
1158       size_t num_threads = pool->GetThreadCount();
1159       thread_pool_counter_ = num_threads;
1160       for (size_t i = 0; i < num_threads; i++) {
1161         pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1));
1162       }
1163       CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads);
1164     }
1165     /*
1166      * Possible scenarios for mappings:
1167      * A) All zygote GCs (or if minor-fault feature isn't available): uses
1168      * uffd's copy mode
1169      *  1) For moving-space ('to' space is same as the moving-space):
1170      *    a) Private-anonymous mappings for 'to' and 'from' space are created in
1171      *    the constructor.
1172      *    b) In the compaction pause, we mremap(dontunmap) from 'to' space to
1173      *    'from' space. This results in moving all pages to 'from' space and
1174      *    emptying the 'to' space, thereby preparing it for userfaultfd
1175      *    registration.
1176      *
1177      *  2) For linear-alloc space:
1178      *    a) Private-anonymous mappings for the linear-alloc and its 'shadow'
1179      *    are created by the arena-pool.
1180      *    b) In the compaction pause, we mremap(dontumap) with similar effect as
1181      *    (A.1.b) above.
1182      *
1183      * B) First GC after zygote: uses uffd's copy-mode
1184      *  1) For moving-space:
1185      *    a) If the mmap for shadow-map has been successful in the constructor,
1186      *    then we remap it (mmap with MAP_FIXED) to get a shared-anonymous
1187      *    mapping.
1188      *    b) Else, we create two memfd and ftruncate them to the moving-space
1189      *    size.
1190      *    c) Same as (A.1.b)
1191      *    d) If (B.1.a), then mremap(dontunmap) from shadow-map to
1192      *    'to' space. This will make both of them map to the same pages
1193      *    e) If (B.1.b), then mmap with the first memfd in shared mode on the
1194      *    'to' space.
1195      *    f) At the end of compaction, we will have moved the moving-space
1196      *    objects to a MAP_SHARED mapping, readying it for minor-fault from next
1197      *    GC cycle.
1198      *
1199      *  2) For linear-alloc space:
1200      *    a) Same as (A.2.b)
1201      *    b) mmap a shared-anonymous mapping onto the linear-alloc space.
1202      *    c) Same as (B.1.f)
1203      *
1204      * C) All subsequent GCs: preferable minor-fault mode. But may also require
1205      * using copy-mode.
1206      *  1) For moving-space:
1207      *    a) If the shadow-map is created and no memfd was used, then that means
1208      *    we are using shared-anonymous. Therefore, mmap a shared-anonymous on
1209      *    the shadow-space.
1210      *    b) If the shadow-map is not mapped yet, then mmap one with a size
1211      *    big enough to hold the compacted moving space. This may fail, in which
1212      *    case we will use uffd's copy-mode.
1213      *    c) If (b) is successful, then mmap the free memfd onto shadow-map.
1214      *    d) Same as (A.1.b)
1215      *    e) In compaction pause, if the shadow-map was not created, then use
1216      *    copy-mode.
1217      *    f) Else, if the created map is smaller than the required-size, then
1218      *    use mremap (without dontunmap) to expand the size. If failed, then use
1219      *    copy-mode.
1220      *    g) Otherwise, same as (B.1.d) and use minor-fault mode.
1221      *
1222      *  2) For linear-alloc space:
1223      *    a) Same as (A.2.b)
1224      *    b) Use minor-fault mode
1225      */
1226     auto mmap_shadow_map = [this](int flags, int fd) {
1227       void* ret = mmap(shadow_to_space_map_.Begin(),
1228                        shadow_to_space_map_.Size(),
1229                        PROT_READ | PROT_WRITE,
1230                        flags,
1231                        fd,
1232                        /*offset=*/0);
1233       DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno);
1234     };
1235     // Setup all the virtual memory ranges required for concurrent compaction.
1236     if (minor_fault_initialized_) {
1237       DCHECK(!is_zygote);
1238       if (UNLIKELY(!shadow_to_space_map_.IsValid())) {
1239         // This case happens only once on the first GC in minor-fault mode, if
1240         // we were unable to reserve shadow-map for moving-space in the
1241         // beginning.
1242         DCHECK_GE(moving_to_space_fd_, 0);
1243         // Take extra 4MB to reduce the likelihood of requiring resizing this
1244         // map in the pause due to black allocations.
1245         size_t reqd_size = std::min(moving_first_objs_count_ * gPageSize + 4 * MB,
1246                                     bump_pointer_space_->Capacity());
1247         // We cannot support memory-tool with shadow-map (as it requires
1248         // appending a redzone) in this case because the mapping may have to be expanded
1249         // using mremap (in KernelPreparation()), which would ignore the redzone.
1250         // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't.
1251         std::string err_msg;
1252         shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow",
1253                                                     reqd_size,
1254                                                     PROT_NONE,
1255                                                     /*low_4gb=*/kObjPtrPoisoning,
1256                                                     &err_msg);
1257 
1258         if (shadow_to_space_map_.IsValid()) {
1259           CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u);
1260           // We want to use MemMap to get low-4GB mapping, if required, but then also
1261           // want to have its ownership as we may grow it (in
1262           // KernelPreparation()). If the ownership is not taken and we try to
1263           // resize MemMap, then it unmaps the virtual range.
1264           MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(),
1265                                                                 /*reuse*/ true);
1266           std::swap(temp, shadow_to_space_map_);
1267           DCHECK(!temp.IsValid());
1268         } else {
1269           LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size)
1270                        << " size. " << err_msg;
1271         }
1272       }
1273 
1274       if (LIKELY(shadow_to_space_map_.IsValid())) {
1275         int fd = moving_to_space_fd_;
1276         int mmap_flags = MAP_SHARED | MAP_FIXED;
1277         if (fd == kFdUnused) {
1278           // Unused moving-to-space fd means we are using anonymous shared
1279           // mapping.
1280           DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1281           mmap_flags |= MAP_ANONYMOUS;
1282           fd = -1;
1283         }
1284         // If the map is smaller than required, then we'll do mremap in the
1285         // compaction pause to increase the size.
1286         mmap_shadow_map(mmap_flags, fd);
1287       }
1288 
1289       for (auto& data : linear_alloc_spaces_data_) {
1290         DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0)
1291             << "mprotect failed: " << strerror(errno);
1292       }
1293     } else if (!is_zygote && uffd_minor_fault_supported_) {
1294       // First GC after zygote-fork. We will still use uffd's copy mode but will
1295       // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which
1296       // will use uffd's minor-fault feature).
1297       if (shadow_to_space_map_.IsValid() &&
1298           shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
1299         mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1);
1300       } else {
1301         size_t size = bump_pointer_space_->Capacity();
1302         DCHECK_EQ(moving_to_space_fd_, kFdUnused);
1303         DCHECK_EQ(moving_from_space_fd_, kFdUnused);
1304         const char* name = bump_pointer_space_->GetName();
1305         moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1306         CHECK_NE(moving_to_space_fd_, -1)
1307             << "memfd_create: failed for " << name << ": " << strerror(errno);
1308         moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1309         CHECK_NE(moving_from_space_fd_, -1)
1310             << "memfd_create: failed for " << name << ": " << strerror(errno);
1311 
1312         // memfds are considered as files from resource limits point of view.
1313         // And the moving space could be several hundred MBs. So increase the
1314         // limit, if it's lower than moving-space size.
1315         bool rlimit_changed = false;
1316         rlimit rlim_read;
1317         CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno);
1318         if (rlim_read.rlim_cur < size) {
1319           rlimit_changed = true;
1320           rlimit rlim = rlim_read;
1321           rlim.rlim_cur = size;
1322           CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno);
1323         }
1324 
1325         // moving-space will map this fd so that we compact objects into it.
1326         int ret = ftruncate(moving_to_space_fd_, size);
1327         CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1328         ret = ftruncate(moving_from_space_fd_, size);
1329         CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1330 
1331         if (rlimit_changed) {
1332           // reset the rlimit to the original limits.
1333           CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0)
1334               << "setrlimit failed: " << strerror(errno);
1335         }
1336       }
1337     }
1338   }
1339 }
1340 
1341 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1342  public:
VerifyRootMarkedVisitor(MarkCompact * collector)1343   explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1344 
VisitRoot(mirror::Object * root,const RootInfo & info)1345   void VisitRoot(mirror::Object* root, const RootInfo& info) override
1346       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1347     CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1348   }
1349 
1350  private:
1351   MarkCompact* const collector_;
1352 };
1353 
ReMarkRoots(Runtime * runtime)1354 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1355   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1356   DCHECK_EQ(thread_running_gc_, Thread::Current());
1357   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1358   MarkNonThreadRoots(runtime);
1359   MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1360                                                   | kVisitRootFlagStopLoggingNewRoots
1361                                                   | kVisitRootFlagClearRootLog),
1362                       runtime);
1363 
1364   if (kVerifyRootsMarked) {
1365     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1366     VerifyRootMarkedVisitor visitor(this);
1367     runtime->VisitRoots(&visitor);
1368   }
1369 }
1370 
MarkingPause()1371 void MarkCompact::MarkingPause() {
1372   TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1373   Runtime* runtime = Runtime::Current();
1374   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1375   {
1376     // Handle the dirty objects as we are a concurrent GC
1377     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1378     {
1379       MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1380       MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1381       std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1382       for (Thread* thread : thread_list) {
1383         thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1384         DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1385         // Need to revoke all the thread-local allocation stacks since we will
1386         // swap the allocation stacks (below) and don't want anybody to allocate
1387         // into the live stack.
1388         thread->RevokeThreadLocalAllocationStack();
1389         bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1390       }
1391     }
1392     // Fetch only the accumulated objects-allocated count as it is guaranteed to
1393     // be up-to-date after the TLAB revocation above.
1394     freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1395     // Capture 'end' of moving-space at this point. Every allocation beyond this
1396     // point will be considered as black.
1397     // Align-up to page boundary so that black allocations happen from next page
1398     // onwards. Also, it ensures that 'end' is aligned for card-table's
1399     // ClearCardRange().
1400     black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1401     DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1402 
1403     // Re-mark root set. Doesn't include thread-roots as they are already marked
1404     // above.
1405     ReMarkRoots(runtime);
1406     // Scan dirty objects.
1407     RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1408     {
1409       TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1410       heap_->SwapStacks();
1411       live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1412     }
1413   }
1414   // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1415   // objects in bump-pointer space when we have a mark-bitmap to indicate live
1416   // objects. At the same time we also need to be able to visit black allocations,
1417   // even though they are not marked in the bitmap. Without both of these we fail
1418   // pre-sweeping verification. As well as we leave windows open wherein a
1419   // VisitObjects/Walk on the space would either miss some objects or visit
1420   // unreachable ones. These windows are when we are switching from shared
1421   // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1422   // heap_->PreSweepingGcVerification(this);
1423 
1424   // Disallow new system weaks to prevent a race which occurs when someone adds
1425   // a new system weak before we sweep them. Since this new system weak may not
1426   // be marked, the GC may incorrectly sweep it. This also fixes a race where
1427   // interning may attempt to return a strong reference to a string that is
1428   // about to be swept.
1429   runtime->DisallowNewSystemWeaks();
1430   // Enable the reference processing slow path, needs to be done with mutators
1431   // paused since there is no lock in the GetReferent fast path.
1432   heap_->GetReferenceProcessor()->EnableSlowPath();
1433 }
1434 
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1435 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1436   TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1437                                GetTimings());
1438   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1439   runtime->SweepSystemWeaks(this);
1440 }
1441 
ProcessReferences(Thread * self)1442 void MarkCompact::ProcessReferences(Thread* self) {
1443   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1444   GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1445 }
1446 
Sweep(bool swap_bitmaps)1447 void MarkCompact::Sweep(bool swap_bitmaps) {
1448   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1449   // Ensure that nobody inserted objects in the live stack after we swapped the
1450   // stacks.
1451   CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1452   {
1453     TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1454     // Mark everything allocated since the last GC as live so that we can sweep
1455     // concurrently, knowing that new allocations won't be marked as live.
1456     accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1457     heap_->MarkAllocStackAsLive(live_stack);
1458     live_stack->Reset();
1459     DCHECK(mark_stack_->IsEmpty());
1460   }
1461   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1462     if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1463         !immune_spaces_.ContainsSpace(space)) {
1464       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1465       DCHECK(!alloc_space->IsZygoteSpace());
1466       TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1467       RecordFree(alloc_space->Sweep(swap_bitmaps));
1468     }
1469   }
1470   SweepLargeObjects(swap_bitmaps);
1471 }
1472 
SweepLargeObjects(bool swap_bitmaps)1473 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1474   space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1475   if (los != nullptr) {
1476     TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1477     RecordFreeLOS(los->Sweep(swap_bitmaps));
1478   }
1479 }
1480 
ReclaimPhase()1481 void MarkCompact::ReclaimPhase() {
1482   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1483   DCHECK(thread_running_gc_ == Thread::Current());
1484   Runtime* const runtime = Runtime::Current();
1485   // Process the references concurrently.
1486   ProcessReferences(thread_running_gc_);
1487   // TODO: Try to merge this system-weak sweeping with the one while updating
1488   // references during the compaction pause.
1489   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1490   runtime->AllowNewSystemWeaks();
1491   // Clean up class loaders after system weaks are swept since that is how we know if class
1492   // unloading occurred.
1493   runtime->GetClassLinker()->CleanupClassLoaders();
1494   {
1495     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1496     // Reclaim unmarked objects.
1497     Sweep(false);
1498     // Swap the live and mark bitmaps for each space which we modified space. This is an
1499     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1500     // bitmaps.
1501     SwapBitmaps();
1502     // Unbind the live and mark bitmaps.
1503     GetHeap()->UnBindBitmaps();
1504   }
1505 }
1506 
1507 // We want to avoid checking for every reference if it's within the page or
1508 // not. This can be done if we know where in the page the holder object lies.
1509 // If it doesn't overlap either boundaries then we can skip the checks.
1510 template <bool kCheckBegin, bool kCheckEnd>
1511 class MarkCompact::RefsUpdateVisitor {
1512  public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1513   explicit RefsUpdateVisitor(MarkCompact* collector,
1514                              mirror::Object* obj,
1515                              uint8_t* begin,
1516                              uint8_t* end)
1517       : collector_(collector),
1518         moving_space_begin_(collector->moving_space_begin_),
1519         moving_space_end_(collector->moving_space_end_),
1520         obj_(obj),
1521         begin_(begin),
1522         end_(end) {
1523     DCHECK(!kCheckBegin || begin != nullptr);
1524     DCHECK(!kCheckEnd || end != nullptr);
1525   }
1526 
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1527   void operator()([[maybe_unused]] mirror::Object* old,
1528                   MemberOffset offset,
1529                   [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1530       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1531     bool update = true;
1532     if (kCheckBegin || kCheckEnd) {
1533       uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1534       update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1535     }
1536     if (update) {
1537       collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1538     }
1539   }
1540 
1541   // For object arrays we don't need to check boundaries here as it's done in
1542   // VisitReferenes().
1543   // TODO: Optimize reference updating using SIMD instructions. Object arrays
1544   // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1545   void operator()([[maybe_unused]] mirror::Object* old,
1546                   MemberOffset offset,
1547                   [[maybe_unused]] bool is_static,
1548                   [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1549       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1550     collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1551   }
1552 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1553   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1554       ALWAYS_INLINE
1555       REQUIRES_SHARED(Locks::mutator_lock_) {
1556     if (!root->IsNull()) {
1557       VisitRoot(root);
1558     }
1559   }
1560 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1561   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1562       ALWAYS_INLINE
1563       REQUIRES_SHARED(Locks::mutator_lock_) {
1564     collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1565   }
1566 
1567  private:
1568   MarkCompact* const collector_;
1569   uint8_t* const moving_space_begin_;
1570   uint8_t* const moving_space_end_;
1571   mirror::Object* const obj_;
1572   uint8_t* const begin_;
1573   uint8_t* const end_;
1574 };
1575 
IsValidObject(mirror::Object * obj) const1576 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1577   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1578   if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1579     return false;
1580   }
1581   return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1582           obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1583 }
1584 
1585 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1586 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1587   if (kIsDebugBuild) {
1588     mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1589     mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1590     mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1591     mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1592     if (HasAddress(pre_compact_klass) &&
1593         reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1594       CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1595           << "ref=" << ref
1596           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1597           << " pre_compact_klass=" << pre_compact_klass
1598           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1599       CHECK(live_words_bitmap_->Test(pre_compact_klass));
1600     }
1601     if (!IsValidObject(ref)) {
1602       std::ostringstream oss;
1603       oss << "Invalid object: "
1604           << "ref=" << ref
1605           << " klass=" << klass
1606           << " klass_klass=" << klass_klass
1607           << " klass_klass_klass=" << klass_klass_klass
1608           << " pre_compact_klass=" << pre_compact_klass
1609           << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1610           << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1611           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1612           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1613 
1614       // Call callback before dumping larger data like RAM and space dumps.
1615       callback(oss);
1616 
1617       oss << " \nobject="
1618           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1619           << " \nklass(from)="
1620           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1621           << "spaces:\n";
1622       heap_->DumpSpaces(oss);
1623       LOG(FATAL) << oss.str();
1624     }
1625   }
1626 }
1627 
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1628 void MarkCompact::CompactPage(mirror::Object* obj,
1629                               uint32_t offset,
1630                               uint8_t* addr,
1631                               bool needs_memset_zero) {
1632   DCHECK(moving_space_bitmap_->Test(obj)
1633          && live_words_bitmap_->Test(obj));
1634   DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1635                                            << " offset=" << offset
1636                                            << " addr=" << static_cast<void*>(addr)
1637                                            << " black_allocs_begin="
1638                                            << static_cast<void*>(black_allocations_begin_)
1639                                            << " post_compact_addr="
1640                                            << static_cast<void*>(post_compact_end_);
1641   uint8_t* const start_addr = addr;
1642   // How many distinct live-strides do we have.
1643   size_t stride_count = 0;
1644   uint8_t* last_stride = addr;
1645   uint32_t last_stride_begin = 0;
1646   auto verify_obj_callback = [&] (std::ostream& os) {
1647                                os << " stride_count=" << stride_count
1648                                   << " last_stride=" << static_cast<void*>(last_stride)
1649                                   << " offset=" << offset
1650                                   << " start_addr=" << static_cast<void*>(start_addr);
1651                              };
1652   obj = GetFromSpaceAddr(obj);
1653   live_words_bitmap_->VisitLiveStrides(
1654       offset,
1655       black_allocations_begin_,
1656       gPageSize,
1657       [&addr, &last_stride, &stride_count, &last_stride_begin, verify_obj_callback, this](
1658           uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1659           REQUIRES_SHARED(Locks::mutator_lock_) {
1660             const size_t stride_in_bytes = stride_size * kAlignment;
1661             DCHECK_LE(stride_in_bytes, gPageSize);
1662             last_stride_begin = stride_begin;
1663             DCHECK(IsAligned<kAlignment>(addr));
1664             memcpy(addr, from_space_begin_ + stride_begin * kAlignment, stride_in_bytes);
1665             if (kIsDebugBuild) {
1666               uint8_t* space_begin = bump_pointer_space_->Begin();
1667               // We can interpret the first word of the stride as an
1668               // obj only from second stride onwards, as the first
1669               // stride's first-object may have started on previous
1670               // page. The only exception is the first page of the
1671               // moving space.
1672               if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1673                 mirror::Object* o =
1674                     reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1675                 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1676                 CHECK(moving_space_bitmap_->Test(o))
1677                     << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1678                 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1679               }
1680             }
1681             last_stride = addr;
1682             addr += stride_in_bytes;
1683             stride_count++;
1684           });
1685   DCHECK_LT(last_stride, start_addr + gPageSize);
1686   DCHECK_GT(stride_count, 0u);
1687   size_t obj_size = 0;
1688   uint32_t offset_within_obj = offset * kAlignment
1689                                - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1690   // First object
1691   if (offset_within_obj > 0) {
1692     mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1693     if (stride_count > 1) {
1694       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1695                                                                          to_ref,
1696                                                                          start_addr,
1697                                                                          nullptr);
1698       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1699               visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1700     } else {
1701       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1702                                                                         to_ref,
1703                                                                         start_addr,
1704                                                                         start_addr + gPageSize);
1705       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1706               visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1707                                                                      + gPageSize));
1708     }
1709     obj_size = RoundUp(obj_size, kAlignment);
1710     DCHECK_GT(obj_size, offset_within_obj)
1711         << "obj:" << obj
1712         << " class:"
1713         << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1714         << " to_addr:" << to_ref
1715         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1716         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1717         << " offset:" << offset * kAlignment
1718         << " class-after-obj-iter:"
1719         << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1720             class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1721         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1722         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1723         << " offset-of-last-idx:"
1724         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1725         << " first-obj-of-last-idx:"
1726         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1727 
1728     obj_size -= offset_within_obj;
1729     // If there is only one stride, then adjust last_stride_begin to the
1730     // end of the first object.
1731     if (stride_count == 1) {
1732       last_stride_begin += obj_size / kAlignment;
1733     }
1734   }
1735 
1736   // Except for the last page being compacted, the pages will have addr ==
1737   // start_addr + gPageSize.
1738   uint8_t* const end_addr = addr;
1739   addr = start_addr;
1740   size_t bytes_done = obj_size;
1741   // All strides except the last one can be updated without any boundary
1742   // checks.
1743   DCHECK_LE(addr, last_stride);
1744   size_t bytes_to_visit = last_stride - addr;
1745   DCHECK_LE(bytes_to_visit, gPageSize);
1746   while (bytes_to_visit > bytes_done) {
1747     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1748     VerifyObject(ref, verify_obj_callback);
1749     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1750             visitor(this, ref, nullptr, nullptr);
1751     obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1752     obj_size = RoundUp(obj_size, kAlignment);
1753     bytes_done += obj_size;
1754   }
1755   // Last stride may have multiple objects in it and we don't know where the
1756   // last object which crosses the page boundary starts, therefore check
1757   // page-end in all of these objects. Also, we need to call
1758   // VisitRefsForCompaction() with from-space object as we fetch object size,
1759   // which in case of klass requires 'class_size_'.
1760   uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1761   bytes_to_visit = end_addr - addr;
1762   DCHECK_LE(bytes_to_visit, gPageSize);
1763   while (bytes_to_visit > bytes_done) {
1764     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1765     obj = reinterpret_cast<mirror::Object*>(from_addr);
1766     VerifyObject(ref, verify_obj_callback);
1767     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1768             visitor(this, ref, nullptr, start_addr + gPageSize);
1769     obj_size = obj->VisitRefsForCompaction(visitor,
1770                                            MemberOffset(0),
1771                                            MemberOffset(end_addr - (addr + bytes_done)));
1772     obj_size = RoundUp(obj_size, kAlignment);
1773     DCHECK_GT(obj_size, 0u)
1774         << "from_addr:" << obj
1775         << " from-space-class:"
1776         << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1777         << " to_addr:" << ref
1778         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1779         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1780         << " offset:" << offset * kAlignment
1781         << " bytes_done:" << bytes_done
1782         << " class-after-obj-iter:"
1783         << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1784             class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1785         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1786         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1787         << " offset-of-last-idx:"
1788         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1789         << " first-obj-of-last-idx:"
1790         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1791 
1792     from_addr += obj_size;
1793     bytes_done += obj_size;
1794   }
1795   // The last page that we compact may have some bytes left untouched in the
1796   // end, we should zero them as the kernel copies at page granularity.
1797   if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1798     std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1799   }
1800 }
1801 
1802 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1803 // size. If more TLAB(s) started in this page, then those chunks are identified
1804 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1805 // If we find a set bit in the bitmap, then we copy the remaining page and then
1806 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1807 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1808                                  mirror::Object* next_page_first_obj,
1809                                  uint32_t first_chunk_size,
1810                                  uint8_t* const pre_compact_page,
1811                                  uint8_t* dest,
1812                                  bool needs_memset_zero) {
1813   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1814   size_t bytes_copied;
1815   uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1816   uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1817   uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1818   uint8_t* const dest_page_end = dest + gPageSize;
1819 
1820   auto verify_obj_callback = [&] (std::ostream& os) {
1821                                os << " first_obj=" << first_obj
1822                                   << " next_page_first_obj=" << next_page_first_obj
1823                                   << " first_chunk_sie=" << first_chunk_size
1824                                   << " dest=" << static_cast<void*>(dest)
1825                                   << " pre_compact_page="
1826                                   << static_cast<void* const>(pre_compact_page);
1827                              };
1828   // We have empty portion at the beginning of the page. Zero it.
1829   if (pre_compact_addr > pre_compact_page) {
1830     bytes_copied = pre_compact_addr - pre_compact_page;
1831     DCHECK_LT(bytes_copied, gPageSize);
1832     if (needs_memset_zero) {
1833       std::memset(dest, 0x0, bytes_copied);
1834     }
1835     dest += bytes_copied;
1836   } else {
1837     bytes_copied = 0;
1838     size_t offset = pre_compact_page - pre_compact_addr;
1839     pre_compact_addr = pre_compact_page;
1840     src_addr += offset;
1841     DCHECK(IsAlignedParam(src_addr, gPageSize));
1842   }
1843   // Copy the first chunk of live words
1844   std::memcpy(dest, src_addr, first_chunk_size);
1845   // Update references in the first chunk. Use object size to find next object.
1846   {
1847     size_t bytes_to_visit = first_chunk_size;
1848     size_t obj_size;
1849     // The first object started in some previous page. So we need to check the
1850     // beginning.
1851     DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1852     size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1853     if (bytes_copied == 0 && offset > 0) {
1854       mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1855       mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1856       // If the next page's first-obj is in this page or nullptr, then we don't
1857       // need to check end boundary
1858       if (next_page_first_obj == nullptr
1859           || (first_obj != next_page_first_obj
1860               && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1861         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1862                                                                            to_obj,
1863                                                                            dest,
1864                                                                            nullptr);
1865         obj_size = from_obj->VisitRefsForCompaction<
1866                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1867                                                                    MemberOffset(offset),
1868                                                                    MemberOffset(-1));
1869       } else {
1870         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1871                                                                           to_obj,
1872                                                                           dest,
1873                                                                           dest_page_end);
1874         obj_size = from_obj->VisitRefsForCompaction<
1875                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1876                                                                    MemberOffset(offset),
1877                                                                    MemberOffset(offset
1878                                                                                 + gPageSize));
1879         if (first_obj == next_page_first_obj) {
1880           // First object is the only object on this page. So there's nothing else left to do.
1881           return;
1882         }
1883       }
1884       obj_size = RoundUp(obj_size, kAlignment);
1885       obj_size -= offset;
1886       dest += obj_size;
1887       bytes_to_visit -= obj_size;
1888     }
1889     bytes_copied += first_chunk_size;
1890     // If the last object in this page is next_page_first_obj, then we need to check end boundary
1891     bool check_last_obj = false;
1892     if (next_page_first_obj != nullptr
1893         && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1894         && bytes_copied == gPageSize) {
1895       size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1896       DCHECK_LE(diff, gPageSize);
1897       DCHECK_LE(diff, bytes_to_visit);
1898       bytes_to_visit -= diff;
1899       check_last_obj = true;
1900     }
1901     while (bytes_to_visit > 0) {
1902       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1903       VerifyObject(dest_obj, verify_obj_callback);
1904       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1905                                                                           dest_obj,
1906                                                                           nullptr,
1907                                                                           nullptr);
1908       obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1909       obj_size = RoundUp(obj_size, kAlignment);
1910       bytes_to_visit -= obj_size;
1911       dest += obj_size;
1912     }
1913     DCHECK_EQ(bytes_to_visit, 0u);
1914     if (check_last_obj) {
1915       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1916       VerifyObject(dest_obj, verify_obj_callback);
1917       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1918                                                                          dest_obj,
1919                                                                          nullptr,
1920                                                                          dest_page_end);
1921       mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1922       obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1923                                                           MemberOffset(0),
1924                                                           MemberOffset(dest_page_end - dest));
1925       return;
1926     }
1927   }
1928 
1929   // Probably a TLAB finished on this page and/or a new TLAB started as well.
1930   if (bytes_copied < gPageSize) {
1931     src_addr += first_chunk_size;
1932     pre_compact_addr += first_chunk_size;
1933     // Use mark-bitmap to identify where objects are. First call
1934     // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1935     // until that object and then call memcpy on the rest of the page.
1936     // Then call VisitMarkedRange for all marked bits *after* the one found in
1937     // this invocation. This time to visit references.
1938     uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1939     uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1940     mirror::Object* found_obj = nullptr;
1941     moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1942                                                                 page_end,
1943                                                                 [&found_obj](mirror::Object* obj) {
1944                                                                   found_obj = obj;
1945                                                                 });
1946     size_t remaining_bytes = gPageSize - bytes_copied;
1947     if (found_obj == nullptr) {
1948       if (needs_memset_zero) {
1949         // No more black objects in this page. Zero the remaining bytes and return.
1950         std::memset(dest, 0x0, remaining_bytes);
1951       }
1952       return;
1953     }
1954     // Copy everything in this page, which includes any zeroed regions
1955     // in-between.
1956     std::memcpy(dest, src_addr, remaining_bytes);
1957     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1958     moving_space_bitmap_->VisitMarkedRange(
1959             reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1960             page_end,
1961             [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1962             REQUIRES_SHARED(Locks::mutator_lock_) {
1963               ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1964               mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1965               VerifyObject(ref, verify_obj_callback);
1966               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1967                       visitor(this, ref, nullptr, nullptr);
1968               ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1969                                                                   MemberOffset(0),
1970                                                                   MemberOffset(-1));
1971               // Remember for next round.
1972               found_obj = obj;
1973             });
1974     // found_obj may have been updated in VisitMarkedRange. Visit the last found
1975     // object.
1976     DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1977     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1978     ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1979     mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
1980     VerifyObject(dest_obj, verify_obj_callback);
1981     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
1982         this, dest_obj, nullptr, dest_page_end);
1983     // Last object could overlap with next page. And if it happens to be a
1984     // class, then we may access something (like static-fields' offsets) which
1985     // is on the next page. Therefore, use from-space's reference.
1986     mirror::Object* obj = GetFromSpaceAddr(found_obj);
1987     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
1988         visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
1989   }
1990 }
1991 
1992 template <bool kFirstPageMapping>
MapProcessedPages(uint8_t * to_space_start,Atomic<PageState> * state_arr,size_t arr_idx,size_t arr_len)1993 void MarkCompact::MapProcessedPages(uint8_t* to_space_start,
1994                                     Atomic<PageState>* state_arr,
1995                                     size_t arr_idx,
1996                                     size_t arr_len) {
1997   CHECK(minor_fault_initialized_);
1998   DCHECK_LT(arr_idx, arr_len);
1999   DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2000   // Claim all the contiguous pages, which are ready to be mapped, and then do
2001   // so in a single ioctl. This helps avoid the overhead of invoking syscall
2002   // several times and also maps the already-processed pages, avoiding
2003   // unnecessary faults on them.
2004   size_t length = kFirstPageMapping ? gPageSize : 0;
2005   if (kFirstPageMapping) {
2006     arr_idx++;
2007   }
2008   // We need to guarantee that we don't end up sucsessfully marking a later
2009   // page 'mapping' and then fail to mark an earlier page. To guarantee that
2010   // we use acq_rel order.
2011   for (; arr_idx < arr_len; arr_idx++, length += gPageSize) {
2012     PageState expected_state = PageState::kProcessed;
2013     if (!state_arr[arr_idx].compare_exchange_strong(
2014             expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) {
2015       break;
2016     }
2017   }
2018   if (length > 0) {
2019     // Note: We need the first page to be attempted (to be mapped) by the ioctl
2020     // as this function is called due to some mutator thread waiting on the
2021     // 'to_space_start' page. Therefore, the ioctl must always be called
2022     // with 'to_space_start' as the 'start' address because it can bail out in
2023     // the middle (not attempting to map the subsequent pages) if it finds any
2024     // page either already mapped in between, or missing on the shadow-map.
2025     struct uffdio_continue uffd_continue;
2026     uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start);
2027     uffd_continue.range.len = length;
2028     uffd_continue.mode = 0;
2029     int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue);
2030     if (UNLIKELY(ret == -1 && errno == EAGAIN)) {
2031       // This can happen only in linear-alloc.
2032       DCHECK(linear_alloc_spaces_data_.end() !=
2033              std::find_if(linear_alloc_spaces_data_.begin(),
2034                           linear_alloc_spaces_data_.end(),
2035                           [to_space_start](const LinearAllocSpaceData& data) {
2036                             return data.begin_ <= to_space_start && to_space_start < data.end_;
2037                           }));
2038 
2039       // This could happen if userfaultfd couldn't find any pages mapped in the
2040       // shadow map. For instance, if there are certain (contiguous) pages on
2041       // linear-alloc which are allocated and have first-object set-up but have
2042       // not been accessed yet.
2043       // Bail out by setting the remaining pages' state back to kProcessed and
2044       // then waking up any waiting threads.
2045       DCHECK_GE(uffd_continue.mapped, 0);
2046       DCHECK_ALIGNED_PARAM(uffd_continue.mapped, gPageSize);
2047       DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length));
2048       if (kFirstPageMapping) {
2049         // In this case the first page must be mapped.
2050         DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(gPageSize));
2051       }
2052       // Nobody would modify these pages' state simultaneously so only atomic
2053       // store is sufficient. Use 'release' order to ensure that all states are
2054       // modified sequentially.
2055       for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0;
2056            remaining_len -= gPageSize) {
2057         arr_idx--;
2058         DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
2059                   PageState::kProcessedAndMapping);
2060         state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release);
2061       }
2062       uffd_continue.range.start =
2063           reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped;
2064       uffd_continue.range.len = length - uffd_continue.mapped;
2065       ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range);
2066       CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno);
2067     } else {
2068       // We may receive ENOENT if gc-thread unregisters the
2069       // range behind our back, which is fine because that
2070       // happens only when it knows compaction is done.
2071       CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT)
2072           << "ioctl_userfaultfd: continue failed: " << strerror(errno);
2073       if (ret == 0) {
2074         DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length));
2075       }
2076     }
2077     if (use_uffd_sigbus_) {
2078       // Nobody else would modify these pages' state simultaneously so atomic
2079       // store is sufficient.
2080       for (; uffd_continue.mapped > 0; uffd_continue.mapped -= gPageSize) {
2081         arr_idx--;
2082         DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
2083                   PageState::kProcessedAndMapping);
2084         state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
2085       }
2086     }
2087   }
2088 }
2089 
2090 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)2091 static void BackOff(uint32_t i) {
2092   // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
2093   if (i <= kYieldMax) {
2094     sched_yield();
2095   } else {
2096     // nanosleep is not in the async-signal-safe list, but bionic implements it
2097     // with a pure system call, so it should be fine.
2098     NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
2099   }
2100 }
2101 
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)2102 size_t MarkCompact::ZeropageIoctl(void* addr,
2103                                   size_t length,
2104                                   bool tolerate_eexist,
2105                                   bool tolerate_enoent) {
2106   int32_t backoff_count = -1;
2107   int32_t max_backoff = 10;  // max native priority.
2108   struct uffdio_zeropage uffd_zeropage;
2109   DCHECK(IsAlignedParam(addr, gPageSize));
2110   uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
2111   uffd_zeropage.range.len = length;
2112   uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
2113   while (true) {
2114     uffd_zeropage.zeropage = 0;
2115     int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
2116     if (ret == 0) {
2117       DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
2118       return length;
2119     } else if (errno == EAGAIN) {
2120       if (uffd_zeropage.zeropage > 0) {
2121         // Contention was observed after acquiring mmap_lock. But the first page
2122         // is already done, which is what we care about.
2123         DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
2124         DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
2125         return uffd_zeropage.zeropage;
2126       } else if (uffd_zeropage.zeropage < 0) {
2127         // mmap_read_trylock() failed due to contention. Back-off and retry.
2128         DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
2129         if (backoff_count == -1) {
2130           int prio = Thread::Current()->GetNativePriority();
2131           DCHECK(prio > 0 && prio <= 10) << prio;
2132           max_backoff -= prio;
2133           backoff_count = 0;
2134         }
2135         if (backoff_count < max_backoff) {
2136           // Using 3 to align 'normal' priority threads with sleep.
2137           BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2138         } else {
2139           uffd_zeropage.mode = 0;
2140         }
2141       }
2142     } else if (tolerate_eexist && errno == EEXIST) {
2143       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2144       // wouldn't be included in it.
2145       return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
2146     } else {
2147       CHECK(tolerate_enoent && errno == ENOENT)
2148           << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
2149       return 0;
2150     }
2151   }
2152 }
2153 
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention)2154 size_t MarkCompact::CopyIoctl(void* dst, void* buffer, size_t length, bool return_on_contention) {
2155   int32_t backoff_count = -1;
2156   int32_t max_backoff = 10;  // max native priority.
2157   struct uffdio_copy uffd_copy;
2158   uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
2159   uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
2160   uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
2161   uffd_copy.len = length;
2162   uffd_copy.copy = 0;
2163   while (true) {
2164     int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
2165     if (ret == 0) {
2166       DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
2167       break;
2168     } else if (errno == EAGAIN) {
2169       // Contention observed.
2170       DCHECK_NE(uffd_copy.copy, 0);
2171       if (uffd_copy.copy > 0) {
2172         // Contention was observed after acquiring mmap_lock.
2173         DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
2174         DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
2175         break;
2176       } else {
2177         // mmap_read_trylock() failed due to contention.
2178         DCHECK_EQ(uffd_copy.copy, -EAGAIN);
2179         uffd_copy.copy = 0;
2180         if (return_on_contention) {
2181           break;
2182         }
2183       }
2184       if (backoff_count == -1) {
2185         int prio = Thread::Current()->GetNativePriority();
2186         DCHECK(prio > 0 && prio <= 10) << prio;
2187         max_backoff -= prio;
2188         backoff_count = 0;
2189       }
2190       if (backoff_count < max_backoff) {
2191         // Using 3 to align 'normal' priority threads with sleep.
2192         BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2193       } else {
2194         uffd_copy.mode = 0;
2195       }
2196     } else if (errno == EEXIST) {
2197       DCHECK_NE(uffd_copy.copy, 0);
2198       if (uffd_copy.copy < 0) {
2199         uffd_copy.copy = 0;
2200       }
2201       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2202       // wouldn't be included in it.
2203       uffd_copy.copy += gPageSize;
2204       break;
2205     } else {
2206       DCHECK_EQ(uffd_copy.copy, -errno);
2207       LOG(FATAL) << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
2208                  << " dst:" << dst;
2209     }
2210   }
2211   return uffd_copy.copy;
2212 }
2213 
2214 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)2215 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
2216                                                   uint8_t* to_space_page,
2217                                                   uint8_t* page,
2218                                                   bool map_immediately,
2219                                                   CompactionFn func) {
2220   uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
2221   uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
2222                                                                   PageState::kProcessing);
2223   // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
2224   // to moving_spaces_status_[page_idx] is released before the contents of the page are
2225   // made accessible to other threads.
2226   //
2227   // We need acquire ordering here to ensure that when the CAS fails, another thread
2228   // has completed processing the page, which is guaranteed by the release below.
2229   if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2230                                     expected_state, desired_state, std::memory_order_acquire)) {
2231     func();
2232     if (kMode == kCopyMode) {
2233       if (map_immediately) {
2234         CopyIoctl(to_space_page, page, gPageSize, /*return_on_contention=*/false);
2235         // Store is sufficient as no other thread could modify the status at this
2236         // point. Relaxed order is sufficient as the ioctl will act as a fence.
2237         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2238                                              std::memory_order_relaxed);
2239       } else {
2240         // Add the src page's index in the status word.
2241         DCHECK(from_space_map_.HasAddress(page));
2242         DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
2243                   std::numeric_limits<uint32_t>::max());
2244         uint32_t store_val = page - from_space_begin_;
2245         DCHECK_EQ(store_val & kPageStateMask, 0u);
2246         store_val |= static_cast<uint8_t>(PageState::kProcessed);
2247         // Store is sufficient as no other thread would modify the status at this point.
2248         moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
2249       }
2250     } else if (kMode == kMinorFaultMode) {
2251       expected_state = static_cast<uint8_t>(PageState::kProcessing);
2252       desired_state = static_cast<uint8_t>(PageState::kProcessed);
2253       // the CAS needs to be with release order to ensure that stores to the
2254       // page makes it to memory *before* other threads observe that it's
2255       // ready to be mapped.
2256       if (!moving_pages_status_[page_idx].compare_exchange_strong(
2257               expected_state, desired_state, std::memory_order_release)) {
2258         // Some mutator has requested to map the page after processing it.
2259         DCHECK_EQ(expected_state, static_cast<uint8_t>(PageState::kProcessingAndMapping));
2260       }
2261       UNREACHABLE();
2262     }
2263     return true;
2264   } else {
2265     // Only GC thread could have set the state to Processed.
2266     DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
2267     return false;
2268   }
2269 }
2270 
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)2271 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
2272   // Thanks to sliding compaction, bump-pointer allocations, and reverse
2273   // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2274   // the to-space page up to which compaction has finished, all the from-space
2275   // pages corresponding to this onwards can be freed. There are some corner
2276   // cases to be taken care of, which are described below.
2277   size_t idx = last_checked_reclaim_page_idx_;
2278   // Find the to-space page up to which the corresponding from-space pages can be
2279   // freed.
2280   for (; idx > cur_page_idx; idx--) {
2281     PageState state = GetMovingPageState(idx - 1);
2282     if (state == PageState::kMutatorProcessing) {
2283       // Some mutator is working on the page.
2284       break;
2285     }
2286     DCHECK(state >= PageState::kProcessed ||
2287            (state == PageState::kUnprocessed &&
2288             (mode == kFallbackMode || idx > moving_first_objs_count_)));
2289   }
2290   DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2291   if (idx == last_checked_reclaim_page_idx_) {
2292     // Nothing to do.
2293     return false;
2294   }
2295 
2296   uint8_t* reclaim_begin;
2297   uint8_t* idx_addr;
2298   // Calculate the first from-space page to be freed using 'idx'. If the
2299   // first-object of the idx'th to-space page started before the corresponding
2300   // from-space page, which is almost always the case in the compaction portion
2301   // of the moving-space, then it indicates that the subsequent pages that are
2302   // yet to be compacted will need the from-space pages. Therefore, find the page
2303   // (from the already compacted pages) whose first-object is different from
2304   // ours. All the from-space pages starting from that one are safe to be
2305   // removed. Please note that this iteration is not expected to be long in
2306   // normal cases as objects are smaller than page size.
2307   if (idx >= moving_first_objs_count_) {
2308     // black-allocated portion of the moving-space
2309     idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
2310     reclaim_begin = idx_addr;
2311     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2312     if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2313       size_t idx_len = moving_first_objs_count_ + black_page_count_;
2314       for (size_t i = idx + 1; i < idx_len; i++) {
2315         mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2316         // A null first-object indicates that the corresponding to-space page is
2317         // not used yet. So we can compute its from-space page and use that.
2318         if (obj != first_obj) {
2319           reclaim_begin = obj != nullptr
2320                           ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
2321                           : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
2322           break;
2323         }
2324       }
2325     }
2326   } else {
2327     DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2328     idx_addr = bump_pointer_space_->Begin() + pre_compact_offset_moving_space_[idx] * kAlignment;
2329     reclaim_begin = idx_addr;
2330     DCHECK_LE(reclaim_begin, black_allocations_begin_);
2331     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2332     if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2333       DCHECK_LT(idx, moving_first_objs_count_);
2334       mirror::Object* obj = first_obj;
2335       for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2336         obj = first_objs_moving_space_[i].AsMirrorPtr();
2337         if (first_obj != obj) {
2338           DCHECK_LT(first_obj, obj);
2339           DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2340           reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2341           break;
2342         }
2343       }
2344       if (obj == first_obj) {
2345         reclaim_begin = black_allocations_begin_;
2346       }
2347     }
2348     reclaim_begin = AlignUp(reclaim_begin, gPageSize);
2349   }
2350 
2351   DCHECK_NE(reclaim_begin, nullptr);
2352   DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2353   DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2354   // Check if the 'class_after_obj_map_' map allows pages to be freed.
2355   for (; class_after_obj_iter_ != class_after_obj_ordered_map_.rend(); class_after_obj_iter_++) {
2356     mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2357     mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2358     // Check with class' end to ensure that, if required, the entire class survives.
2359     uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2360     DCHECK_LE(klass_end, last_reclaimed_page_);
2361     if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2362       // Found a class which is in the reclaim range.
2363       uint8_t* obj_addr = reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr());
2364       // NOTE: Don't assert that obj is of 'klass' type as klass could instead
2365       // be its super-class.
2366       if (obj_addr < idx_addr) {
2367         // Its lowest-address object is not compacted yet. Reclaim starting from
2368         // the end of this class.
2369         reclaim_begin = AlignUp(klass_end, gPageSize);
2370       } else {
2371         // Continue consuming pairs wherein the lowest address object has already
2372         // been compacted.
2373         continue;
2374       }
2375     }
2376     // All the remaining class (and thereby corresponding object) addresses are
2377     // lower than the reclaim range.
2378     break;
2379   }
2380   bool all_mapped = mode == kFallbackMode;
2381   ssize_t size = last_reclaimed_page_ - reclaim_begin;
2382   if (size > kMinFromSpaceMadviseSize) {
2383     // Map all the pages in the range.
2384     if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2385       if (MapMovingSpacePages(cur_page_idx,
2386                               end_idx_for_mapping,
2387                               /*from_ioctl=*/false,
2388                               /*return_on_contention=*/true) ==
2389           end_idx_for_mapping - cur_page_idx) {
2390         all_mapped = true;
2391       }
2392     } else {
2393       // This for the black-allocations pages so that madvise is not missed.
2394       all_mapped = true;
2395     }
2396     // If not all pages are mapped, then take it as a hint that mmap_lock is
2397     // contended and hence don't madvise as that also needs the same lock.
2398     if (all_mapped) {
2399       // Retain a few pages for subsequent compactions.
2400       const ssize_t gBufferPages = 4 * gPageSize;
2401       DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2402       size -= gBufferPages;
2403       uint8_t* addr = last_reclaimed_page_ - size;
2404       int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED;
2405       CHECK_EQ(madvise(addr + from_space_slide_diff_, size, behavior), 0)
2406           << "madvise of from-space failed: " << strerror(errno);
2407       last_reclaimed_page_ = addr;
2408       cur_reclaimable_page_ = addr;
2409     }
2410   }
2411   CHECK_LE(reclaim_begin, last_reclaimable_page_);
2412   last_reclaimable_page_ = reclaim_begin;
2413   last_checked_reclaim_page_idx_ = idx;
2414   return all_mapped;
2415 }
2416 
UpdateClassAfterObjMap()2417 void MarkCompact::UpdateClassAfterObjMap() {
2418   CHECK(class_after_obj_ordered_map_.empty());
2419   for (const auto& pair : class_after_obj_hash_map_) {
2420     auto super_class_iter = super_class_after_class_hash_map_.find(pair.first);
2421     ObjReference key = super_class_iter != super_class_after_class_hash_map_.end()
2422                        ? super_class_iter->second
2423                        : pair.first;
2424     if (std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), key.AsMirrorPtr()) &&
2425         HasAddress(key.AsMirrorPtr())) {
2426       auto [ret_iter, success] = class_after_obj_ordered_map_.try_emplace(key, pair.second);
2427       // It could fail only if the class 'key' has objects of its own, which are lower in
2428       // address order, as well of some of its derived class. In this case
2429       // choose the lowest address object.
2430       if (!success &&
2431           std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), ret_iter->second.AsMirrorPtr())) {
2432         ret_iter->second = pair.second;
2433       }
2434     }
2435   }
2436   class_after_obj_hash_map_.clear();
2437   super_class_after_class_hash_map_.clear();
2438 }
2439 
2440 template <int kMode>
CompactMovingSpace(uint8_t * page)2441 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2442   // For every page we have a starting object, which may have started in some
2443   // preceding page, and an offset within that object from where we must start
2444   // copying.
2445   // Consult the live-words bitmap to copy all contiguously live words at a
2446   // time. These words may constitute multiple objects. To avoid the need for
2447   // consulting mark-bitmap to find where does the next live object start, we
2448   // use the object-size returned by VisitRefsForCompaction.
2449   //
2450   // We do the compaction in reverse direction so that the pages containing
2451   // TLAB and latest allocations are processed first.
2452   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2453   size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2454   size_t idx = page_status_arr_len;
2455   uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * gPageSize;
2456   uint8_t* shadow_space_end = nullptr;
2457   if (kMode == kMinorFaultMode) {
2458     shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * gPageSize;
2459   }
2460   uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2461 
2462   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2463 
2464   UpdateClassAfterObjMap();
2465   // These variables are maintained by FreeFromSpacePages().
2466   last_reclaimed_page_ = pre_compact_page;
2467   last_reclaimable_page_ = last_reclaimed_page_;
2468   cur_reclaimable_page_ = last_reclaimed_page_;
2469   last_checked_reclaim_page_idx_ = idx;
2470   class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin();
2471   // Allocated-black pages
2472   mirror::Object* next_page_first_obj = nullptr;
2473   while (idx > moving_first_objs_count_) {
2474     idx--;
2475     pre_compact_page -= gPageSize;
2476     to_space_end -= gPageSize;
2477     if (kMode == kMinorFaultMode) {
2478       shadow_space_end -= gPageSize;
2479       page = shadow_space_end;
2480     } else if (kMode == kFallbackMode) {
2481       page = to_space_end;
2482     }
2483     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2484     uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2485     if (first_obj != nullptr) {
2486       DoPageCompactionWithStateChange<kMode>(idx,
2487                                              to_space_end,
2488                                              page,
2489                                              /*map_immediately=*/true,
2490                                              [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2491                                                SlideBlackPage(first_obj,
2492                                                               next_page_first_obj,
2493                                                               first_chunk_size,
2494                                                               pre_compact_page,
2495                                                               page,
2496                                                               kMode == kCopyMode);
2497                                              });
2498       // We are sliding here, so no point attempting to madvise for every
2499       // page. Wait for enough pages to be done.
2500       if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2501         FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2502       }
2503     }
2504     next_page_first_obj = first_obj;
2505   }
2506   DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2507   // Reserved page to be used if we can't find any reclaimable page for processing.
2508   uint8_t* reserve_page = page;
2509   size_t end_idx_for_mapping = idx;
2510   while (idx > 0) {
2511     idx--;
2512     to_space_end -= gPageSize;
2513     if (kMode == kMinorFaultMode) {
2514       shadow_space_end -= gPageSize;
2515       page = shadow_space_end;
2516     } else if (kMode == kFallbackMode) {
2517       page = to_space_end;
2518     } else {
2519       DCHECK_EQ(kMode, kCopyMode);
2520       if (cur_reclaimable_page_ > last_reclaimable_page_) {
2521         cur_reclaimable_page_ -= gPageSize;
2522         page = cur_reclaimable_page_ + from_space_slide_diff_;
2523       } else {
2524         page = reserve_page;
2525       }
2526     }
2527     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2528     bool success = DoPageCompactionWithStateChange<kMode>(
2529         idx,
2530         to_space_end,
2531         page,
2532         /*map_immediately=*/page == reserve_page,
2533         [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2534           CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2535         });
2536     if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2537       // map the pages in the following address as they can't be mapped with the
2538       // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2539       MapMovingSpacePages(
2540           idx + 1, end_idx_for_mapping, /*from_fault=*/false, /*return_on_contention=*/true);
2541     }
2542     if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2543       end_idx_for_mapping = idx;
2544     }
2545   }
2546   // map one last time to finish anything left.
2547   if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2548     MapMovingSpacePages(
2549         idx, end_idx_for_mapping, /*from_fault=*/false, /*return_on_contention=*/false);
2550   }
2551   DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2552 }
2553 
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention)2554 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2555                                         size_t arr_len,
2556                                         bool from_fault,
2557                                         bool return_on_contention) {
2558   DCHECK_LT(start_idx, arr_len);
2559   size_t arr_idx = start_idx;
2560   bool wait_for_unmapped = false;
2561   while (arr_idx < arr_len) {
2562     size_t map_count = 0;
2563     uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2564     // Find a contiguous range that can be mapped with single ioctl.
2565     for (size_t i = arr_idx; i < arr_len; i++, map_count++) {
2566       uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2567       if (GetPageStateFromWord(s) != PageState::kProcessed) {
2568         break;
2569       }
2570       DCHECK_EQ((cur_state & ~kPageStateMask) + (i - arr_idx) * gPageSize, s & ~kPageStateMask);
2571     }
2572 
2573     if (map_count == 0) {
2574       if (from_fault) {
2575         bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2576         return mapped ? 1 : 0;
2577       }
2578       // Skip the pages that this thread cannot map.
2579       for (; arr_idx < arr_len; arr_idx++) {
2580         PageState s = GetMovingPageState(arr_idx);
2581         if (s == PageState::kProcessed) {
2582           break;
2583         } else if (s != PageState::kProcessedAndMapped) {
2584           wait_for_unmapped = true;
2585         }
2586       }
2587     } else {
2588       uint32_t from_space_offset = cur_state & ~kPageStateMask;
2589       uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2590       uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2591       DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2592       DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2593       size_t mapped_len =
2594           CopyIoctl(to_space_start, from_space_start, map_count * gPageSize, return_on_contention);
2595       for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2596         // Store is sufficient as anyone storing is doing it with the same value.
2597         moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2598                                             std::memory_order_release);
2599       }
2600       if (from_fault) {
2601         return DivideByPageSize(mapped_len);
2602       }
2603       // We can return from COPY ioctl with a smaller length also if a page
2604       // was found to be already mapped. But that doesn't count as contention.
2605       if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2606         return arr_idx - start_idx;
2607       }
2608     }
2609   }
2610   if (wait_for_unmapped) {
2611     for (size_t i = start_idx; i < arr_len; i++) {
2612       PageState s = GetMovingPageState(i);
2613       DCHECK_GT(s, PageState::kProcessed);
2614       uint32_t backoff_count = 0;
2615       while (s != PageState::kProcessedAndMapped) {
2616         BackOff(backoff_count++);
2617         s = GetMovingPageState(i);
2618       }
2619     }
2620   }
2621   return arr_len - start_idx;
2622 }
2623 
UpdateNonMovingPage(mirror::Object * first,uint8_t * page)2624 void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
2625   DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2626   // For every object found in the page, visit the previous object. This ensures
2627   // that we can visit without checking page-end boundary.
2628   // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2629   // super-class loads require it.
2630   // TODO: Set kVisitNativeRoots to false once we implement concurrent
2631   // compaction
2632   mirror::Object* curr_obj = first;
2633   non_moving_space_bitmap_->VisitMarkedRange(
2634           reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2635           reinterpret_cast<uintptr_t>(page + gPageSize),
2636           [&](mirror::Object* next_obj) {
2637             // TODO: Once non-moving space update becomes concurrent, we'll
2638             // require fetching the from-space address of 'curr_obj' and then call
2639             // visitor on that.
2640             if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2641               RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false>
2642                       visitor(this, curr_obj, page, page + gPageSize);
2643               MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2644               // Native roots shouldn't be visited as they are done when this
2645               // object's beginning was visited in the preceding page.
2646               curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2647                       visitor, begin_offset, MemberOffset(-1));
2648             } else {
2649               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2650                       visitor(this, curr_obj, page, page + gPageSize);
2651               curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2652                                                                        MemberOffset(0),
2653                                                                        MemberOffset(-1));
2654             }
2655             curr_obj = next_obj;
2656           });
2657 
2658   MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2659   if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2660     RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true>
2661             visitor(this, curr_obj, page, page + gPageSize);
2662     curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2663             visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2664   } else {
2665     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
2666             visitor(this, curr_obj, page, page + gPageSize);
2667     curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, MemberOffset(0), end_offset);
2668   }
2669 }
2670 
UpdateNonMovingSpace()2671 void MarkCompact::UpdateNonMovingSpace() {
2672   TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2673   // Iterating in reverse ensures that the class pointer in objects which span
2674   // across more than one page gets updated in the end. This is necessary for
2675   // VisitRefsForCompaction() to work correctly.
2676   // TODO: If and when we make non-moving space update concurrent, implement a
2677   // mechanism to remember class pointers for such objects off-heap and pass it
2678   // to VisitRefsForCompaction().
2679   uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2680   for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2681     mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2682     page -= gPageSize;
2683     // null means there are no objects on the page to update references.
2684     if (obj != nullptr) {
2685       UpdateNonMovingPage(obj, page);
2686     }
2687   }
2688 }
2689 
UpdateMovingSpaceBlackAllocations()2690 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2691   // For sliding black pages, we need the first-object, which overlaps with the
2692   // first byte of the page. Additionally, we compute the size of first chunk of
2693   // black objects. This will suffice for most black pages. Unlike, compaction
2694   // pages, here we don't need to pre-compute the offset within first-obj from
2695   // where sliding has to start. That can be calculated using the pre-compact
2696   // address of the page. Therefore, to save space, we store the first chunk's
2697   // size in black_alloc_pages_first_chunk_size_ array.
2698   // For the pages which may have holes after the first chunk, which could happen
2699   // if a new TLAB starts in the middle of the page, we mark the objects in
2700   // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2701   // then we use the mark-bitmap for the remainder of the page.
2702   uint8_t* const begin = bump_pointer_space_->Begin();
2703   uint8_t* black_allocs = black_allocations_begin_;
2704   DCHECK_LE(begin, black_allocs);
2705   size_t consumed_blocks_count = 0;
2706   size_t first_block_size;
2707   // Needed only for debug at the end of the function. Hopefully compiler will
2708   // eliminate it otherwise.
2709   size_t num_blocks = 0;
2710   // Get the list of all blocks allocated in the bump-pointer space.
2711   std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2712                                                                         &first_block_size);
2713   DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2714   if (block_sizes != nullptr) {
2715     size_t black_page_idx = moving_first_objs_count_;
2716     uint8_t* block_end = begin + first_block_size;
2717     uint32_t remaining_chunk_size = 0;
2718     uint32_t first_chunk_size = 0;
2719     mirror::Object* first_obj = nullptr;
2720     num_blocks = block_sizes->size();
2721     for (size_t block_size : *block_sizes) {
2722       block_end += block_size;
2723       // Skip the blocks that are prior to the black allocations. These will be
2724       // merged with the main-block later.
2725       if (black_allocs >= block_end) {
2726         consumed_blocks_count++;
2727         continue;
2728       }
2729       mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2730       bool set_mark_bit = remaining_chunk_size > 0;
2731       // We don't know how many objects are allocated in the current block. When we hit
2732       // a null assume it's the end. This works as every block is expected to
2733       // have objects allocated linearly using bump-pointer.
2734       // BumpPointerSpace::Walk() also works similarly.
2735       while (black_allocs < block_end
2736              && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2737         // Try to keep instructions which access class instance together to
2738         // avoid reloading the pointer from object.
2739         size_t obj_size = obj->SizeOf();
2740         bytes_scanned_ += obj_size;
2741         obj_size = RoundUp(obj_size, kAlignment);
2742         UpdateClassAfterObjectMap(obj);
2743         if (first_obj == nullptr) {
2744           first_obj = obj;
2745         }
2746         // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2747         // the middle of the page.
2748         if (set_mark_bit) {
2749           moving_space_bitmap_->Set(obj);
2750         }
2751         // Handle objects which cross page boundary, including objects larger
2752         // than page size.
2753         if (remaining_chunk_size + obj_size >= gPageSize) {
2754           set_mark_bit = false;
2755           first_chunk_size += gPageSize - remaining_chunk_size;
2756           remaining_chunk_size += obj_size;
2757           // We should not store first-object and remaining_chunk_size if there were
2758           // unused bytes before this TLAB, in which case we must have already
2759           // stored the values (below).
2760           if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2761             black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2762             first_objs_moving_space_[black_page_idx].Assign(first_obj);
2763           }
2764           black_page_idx++;
2765           remaining_chunk_size -= gPageSize;
2766           // Consume an object larger than page size.
2767           while (remaining_chunk_size >= gPageSize) {
2768             black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2769             first_objs_moving_space_[black_page_idx].Assign(obj);
2770             black_page_idx++;
2771             remaining_chunk_size -= gPageSize;
2772           }
2773           first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2774           first_chunk_size = remaining_chunk_size;
2775         } else {
2776           DCHECK_LE(first_chunk_size, remaining_chunk_size);
2777           first_chunk_size += obj_size;
2778           remaining_chunk_size += obj_size;
2779         }
2780         black_allocs += obj_size;
2781         obj = reinterpret_cast<mirror::Object*>(black_allocs);
2782       }
2783       DCHECK_LE(black_allocs, block_end);
2784       DCHECK_LT(remaining_chunk_size, gPageSize);
2785       // consume the unallocated portion of the block
2786       if (black_allocs < block_end) {
2787         // first-chunk of the current page ends here. Store it.
2788         if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2789           black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2790           first_objs_moving_space_[black_page_idx].Assign(first_obj);
2791         }
2792         first_chunk_size = 0;
2793         first_obj = nullptr;
2794         size_t page_remaining = gPageSize - remaining_chunk_size;
2795         size_t block_remaining = block_end - black_allocs;
2796         if (page_remaining <= block_remaining) {
2797           block_remaining -= page_remaining;
2798           // current page and the subsequent empty pages in the block
2799           black_page_idx += 1 + DivideByPageSize(block_remaining);
2800           remaining_chunk_size = ModuloPageSize(block_remaining);
2801         } else {
2802           remaining_chunk_size += block_remaining;
2803         }
2804         black_allocs = block_end;
2805       }
2806     }
2807     if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2808       // Store the leftover first-chunk, if any, and update page index.
2809       if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2810         black_page_idx++;
2811       } else if (first_chunk_size > 0) {
2812         black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2813         first_objs_moving_space_[black_page_idx].Assign(first_obj);
2814         black_page_idx++;
2815       }
2816     }
2817     black_page_count_ = black_page_idx - moving_first_objs_count_;
2818     delete block_sizes;
2819   }
2820   // Update bump-pointer space by consuming all the pre-black blocks into the
2821   // main one.
2822   bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2823                                      post_compact_end_ - begin,
2824                                      consumed_blocks_count);
2825   if (kIsDebugBuild) {
2826     size_t moving_space_size = bump_pointer_space_->Size();
2827     size_t los_size = 0;
2828     if (heap_->GetLargeObjectsSpace()) {
2829       los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2830     }
2831     // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2832     // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2833     // in Sweep(), but not with moving-space freed-bytes.
2834     CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2835         << " moving-space size:" << moving_space_size
2836         << " moving-space bytes-freed:" << black_objs_slide_diff_
2837         << " large-object-space size:" << los_size
2838         << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2839         << " num-tlabs-merged:" << consumed_blocks_count
2840         << " main-block-size:" << (post_compact_end_ - begin)
2841         << " total-tlabs-moving-space:" << num_blocks;
2842   }
2843 }
2844 
UpdateNonMovingSpaceBlackAllocations()2845 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2846   accounting::ObjectStack* stack = heap_->GetAllocationStack();
2847   const StackReference<mirror::Object>* limit = stack->End();
2848   uint8_t* const space_begin = non_moving_space_->Begin();
2849   for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2850     mirror::Object* obj = it->AsMirrorPtr();
2851     if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2852       non_moving_space_bitmap_->Set(obj);
2853       // Clear so that we don't try to set the bit again in the next GC-cycle.
2854       it->Clear();
2855       size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2856       uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2857       mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2858       if (first_obj == nullptr
2859           || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2860         first_objs_non_moving_space_[idx].Assign(obj);
2861       }
2862       mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2863       uint8_t* next_page_begin = page_begin + gPageSize;
2864       if (next_page_first_obj == nullptr
2865           || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2866         size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2867         uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2868         while (next_page_begin < obj_end) {
2869           first_objs_non_moving_space_[idx++].Assign(obj);
2870           next_page_begin += gPageSize;
2871         }
2872       }
2873       // update first_objs count in case we went past non_moving_first_objs_count_
2874       non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2875     }
2876   }
2877 }
2878 
2879 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2880  public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2881   explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2882 
operator ()(mirror::Object * obj) const2883   void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2884     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2885                                                                         obj,
2886                                                                         /*begin_*/nullptr,
2887                                                                         /*end_*/nullptr);
2888     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2889         visitor, MemberOffset(0), MemberOffset(-1));
2890   }
2891 
Callback(mirror::Object * obj,void * arg)2892   static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2893     reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2894   }
2895 
2896  private:
2897   MarkCompact* const collector_;
2898 };
2899 
2900 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2901  public:
ClassLoaderRootsUpdater(MarkCompact * collector)2902   explicit ClassLoaderRootsUpdater(MarkCompact* collector)
2903       : collector_(collector),
2904         moving_space_begin_(collector->moving_space_begin_),
2905         moving_space_end_(collector->moving_space_end_) {}
2906 
Visit(ObjPtr<mirror::ClassLoader> class_loader)2907   void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2908       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2909     ClassTable* const class_table = class_loader->GetClassTable();
2910     if (class_table != nullptr) {
2911       // Classes are updated concurrently.
2912       class_table->VisitRoots(*this, /*skip_classes=*/true);
2913     }
2914   }
2915 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2916   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2917       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2918     if (!root->IsNull()) {
2919       VisitRoot(root);
2920     }
2921   }
2922 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2923   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2924       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2925     collector_->UpdateRoot(
2926         root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
2927   }
2928 
2929  private:
2930   MarkCompact* collector_;
2931   uint8_t* const moving_space_begin_;
2932   uint8_t* const moving_space_end_;
2933 };
2934 
2935 class MarkCompact::LinearAllocPageUpdater {
2936  public:
LinearAllocPageUpdater(MarkCompact * collector)2937   explicit LinearAllocPageUpdater(MarkCompact* collector)
2938       : collector_(collector),
2939         moving_space_begin_(collector->moving_space_begin_),
2940         moving_space_end_(collector->moving_space_end_),
2941         last_page_touched_(false) {}
2942 
2943   // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)2944   void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
2945       REQUIRES_SHARED(Locks::mutator_lock_) {
2946     DCHECK(first_obj != nullptr);
2947     DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
2948     uint8_t* page_end = page_begin + gPageSize;
2949     uint32_t obj_size;
2950     for (uint8_t* byte = first_obj; byte < page_end;) {
2951       TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2952       obj_size = header->GetSize();
2953       if (UNLIKELY(obj_size == 0)) {
2954         // No more objects in this page to visit.
2955         last_page_touched_ = byte >= page_begin;
2956         return;
2957       }
2958       uint8_t* obj = byte + sizeof(TrackingHeader);
2959       uint8_t* obj_end = byte + obj_size;
2960       if (header->Is16Aligned()) {
2961         obj = AlignUp(obj, 16);
2962       }
2963       uint8_t* begin_boundary = std::max(obj, page_begin);
2964       uint8_t* end_boundary = std::min(obj_end, page_end);
2965       if (begin_boundary < end_boundary) {
2966         VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2967       }
2968       if (ArenaAllocator::IsRunningOnMemoryTool()) {
2969         obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2970       }
2971       byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2972     }
2973     last_page_touched_ = true;
2974   }
2975 
2976   // This version is only used for cases where the entire page is filled with
2977   // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)2978   void SingleObjectArena(uint8_t* page_begin, size_t page_size)
2979       REQUIRES_SHARED(Locks::mutator_lock_) {
2980     static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
2981     DCHECK_ALIGNED(page_begin, kAlignment);
2982     // Least significant bits are used by class-table.
2983     static constexpr uint32_t kMask = kObjectAlignment - 1;
2984     size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
2985     uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
2986     for (size_t i = 0; i < num_roots; root_ptr++, i++) {
2987       uint32_t word = *root_ptr;
2988       if (word != 0) {
2989         uint32_t lsbs = word & kMask;
2990         word &= ~kMask;
2991         VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
2992         *root_ptr = word | lsbs;
2993         last_page_touched_ = true;
2994       }
2995     }
2996   }
2997 
WasLastPageTouched() const2998   bool WasLastPageTouched() const { return last_page_touched_; }
2999 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3000   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3001       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3002     if (!root->IsNull()) {
3003       VisitRoot(root);
3004     }
3005   }
3006 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3007   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3008       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3009     mirror::Object* old_ref = root->AsMirrorPtr();
3010     DCHECK_NE(old_ref, nullptr);
3011     if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
3012       mirror::Object* new_ref = old_ref;
3013       if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
3014         new_ref = collector_->PostCompactBlackObjAddr(old_ref);
3015       } else if (collector_->live_words_bitmap_->Test(old_ref)) {
3016         DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
3017             << "ref:" << old_ref << " root:" << root;
3018         new_ref = collector_->PostCompactOldObjAddr(old_ref);
3019       }
3020       if (old_ref != new_ref) {
3021         root->Assign(new_ref);
3022       }
3023     }
3024   }
3025 
3026  private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const3027   void VisitObject(LinearAllocKind kind,
3028                    void* obj,
3029                    uint8_t* start_boundary,
3030                    uint8_t* end_boundary) const ALWAYS_INLINE
3031       REQUIRES_SHARED(Locks::mutator_lock_) {
3032     switch (kind) {
3033       case LinearAllocKind::kNoGCRoots:
3034         break;
3035       case LinearAllocKind::kGCRootArray:
3036         {
3037           GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
3038           GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
3039           for (; root < last; root++) {
3040             VisitRootIfNonNull(root->AddressWithoutBarrier());
3041           }
3042         }
3043         break;
3044       case LinearAllocKind::kArtMethodArray:
3045         {
3046           LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
3047           // Old methods are clobbered in debug builds. Check size to confirm if the array
3048           // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
3049           if (array->size() > 0) {
3050             if (collector_->pointer_size_ == PointerSize::k64) {
3051               ArtMethod::VisitArrayRoots<PointerSize::k64>(
3052                   *this, start_boundary, end_boundary, array);
3053             } else {
3054               DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
3055               ArtMethod::VisitArrayRoots<PointerSize::k32>(
3056                   *this, start_boundary, end_boundary, array);
3057             }
3058           }
3059         }
3060         break;
3061       case LinearAllocKind::kArtMethod:
3062         ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
3063         break;
3064       case LinearAllocKind::kArtFieldArray:
3065         ArtField::VisitArrayRoots(*this,
3066                                   start_boundary,
3067                                   end_boundary,
3068                                   static_cast<LengthPrefixedArray<ArtField>*>(obj));
3069         break;
3070       case LinearAllocKind::kDexCacheArray:
3071         {
3072           mirror::DexCachePair<mirror::Object>* first =
3073               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
3074           mirror::DexCachePair<mirror::Object>* last =
3075               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
3076           mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
3077       }
3078     }
3079   }
3080 
3081   MarkCompact* const collector_;
3082   // Cache to speed up checking if GC-root is in moving space or not.
3083   uint8_t* const moving_space_begin_;
3084   uint8_t* const moving_space_end_;
3085   // Whether the last page was touched or not.
3086   bool last_page_touched_ = false;
3087 };
3088 
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)3089 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
3090   // If the process is debuggable then redefinition is allowed, which may mean
3091   // pre-zygote-fork class-tables may have pointer to class in moving-space.
3092   // So visit classes from class-sets that are not in linear-alloc arena-pool.
3093   if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
3094     ClassLinker* linker = runtime->GetClassLinker();
3095     ClassLoaderRootsUpdater updater(this);
3096     GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3097     auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
3098       if (!set.empty()) {
3099         return immune_class_table_only ?
3100                immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
3101                !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
3102       }
3103       return false;
3104     };
3105     linker->VisitClassTables([cond, &updater](ClassTable* table)
3106                                  REQUIRES_SHARED(Locks::mutator_lock_) {
3107                                table->VisitClassesIfConditionMet(cond, updater);
3108                              });
3109     ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3110     linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
3111   }
3112 }
3113 
CompactionPause()3114 void MarkCompact::CompactionPause() {
3115   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3116   Runtime* runtime = Runtime::Current();
3117   non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
3118   if (kIsDebugBuild) {
3119     DCHECK_EQ(thread_running_gc_, Thread::Current());
3120     stack_low_addr_ = thread_running_gc_->GetStackEnd();
3121     stack_high_addr_ =
3122         reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize();
3123   }
3124   {
3125     TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
3126     ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3127     // Refresh data-structures to catch-up on allocations that may have
3128     // happened since marking-phase pause.
3129     // There could be several TLABs that got allocated since marking pause. We
3130     // don't want to compact them and instead update the TLAB info in TLS and
3131     // let mutators continue to use the TLABs.
3132     // We need to set all the bits in live-words bitmap corresponding to allocated
3133     // objects. Also, we need to find the objects that are overlapping with
3134     // page-begin boundaries. Unlike objects allocated before
3135     // black_allocations_begin_, which can be identified via mark-bitmap, we can get
3136     // this info only via walking the space past black_allocations_begin_, which
3137     // involves fetching object size.
3138     // TODO: We can reduce the time spent on this in a pause by performing one
3139     // round of this concurrently prior to the pause.
3140     UpdateMovingSpaceBlackAllocations();
3141     // Iterate over the allocation_stack_, for every object in the non-moving
3142     // space:
3143     // 1. Mark the object in live bitmap
3144     // 2. Erase the object from allocation stack
3145     // 3. In the corresponding page, if the first-object vector needs updating
3146     // then do so.
3147     UpdateNonMovingSpaceBlackAllocations();
3148     // This store is visible to mutator (or uffd worker threads) as the mutator
3149     // lock's unlock guarantees that.
3150     compacting_ = true;
3151     // Start updating roots and system weaks now.
3152     heap_->GetReferenceProcessor()->UpdateRoots(this);
3153   }
3154   {
3155     // TODO: Immune space updation has to happen either before or after
3156     // remapping pre-compact pages to from-space. And depending on when it's
3157     // done, we have to invoke VisitRefsForCompaction() with or without
3158     // read-barrier.
3159     TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
3160     accounting::CardTable* const card_table = heap_->GetCardTable();
3161     for (auto& space : immune_spaces_.GetSpaces()) {
3162       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
3163       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
3164       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3165       // Having zygote-space indicates that the first zygote fork has taken
3166       // place and that the classes/dex-caches in immune-spaces may have allocations
3167       // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
3168       // non-userfaultfd visited private-anonymous mappings. Visit them here.
3169       ImmuneSpaceUpdateObjVisitor visitor(this);
3170       if (table != nullptr) {
3171         table->ProcessCards();
3172         table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
3173       } else {
3174         WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3175         card_table->Scan<false>(
3176             live_bitmap,
3177             space->Begin(),
3178             space->Limit(),
3179             visitor,
3180             accounting::CardTable::kCardDirty - 1);
3181       }
3182     }
3183   }
3184 
3185   {
3186     TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
3187     runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
3188     runtime->VisitNonThreadRoots(this);
3189     {
3190       ClassLinker* linker = runtime->GetClassLinker();
3191       ClassLoaderRootsUpdater updater(this);
3192       ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3193       linker->VisitClassLoaders(&updater);
3194       linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
3195     }
3196     SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
3197 
3198     bool has_zygote_space = heap_->HasZygoteSpace();
3199     GcVisitedArenaPool* arena_pool =
3200         static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3201     // Update immune/pre-zygote class-tables in case class redefinition took
3202     // place. pre-zygote class-tables that are not in immune spaces are updated
3203     // below if we are in fallback-mode or if there is no zygote space. So in
3204     // that case only visit class-tables that are there in immune-spaces.
3205     UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
3206 
3207     // Acquire arena-pool's lock, which should be released after the pool is
3208     // userfaultfd registered. This is to ensure that no new arenas are
3209     // allocated and used in between. Since they will not be captured in
3210     // linear_alloc_arenas_ below, we will miss updating their pages. The same
3211     // reason also applies to new allocations within the existing arena which
3212     // may change last_byte.
3213     // Since we are in a STW pause, this shouldn't happen anyways, but holding
3214     // the lock confirms it.
3215     // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
3216     // doesn't fail once it is available for ReaderWriterMutex.
3217     WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
3218 
3219     // TODO: Find out why it's not sufficient to visit native roots of immune
3220     // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
3221     // Is it possible that some native root starts getting pointed to by some object
3222     // in moving space after fork? Or are we missing a write-barrier somewhere
3223     // when a native root is updated?
3224     auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
3225                              REQUIRES_SHARED(Locks::mutator_lock_) {
3226                            LinearAllocPageUpdater updater(this);
3227                            if (first_obj != nullptr) {
3228                              updater.MultiObjectArena(page_begin, first_obj);
3229                            } else {
3230                              updater.SingleObjectArena(page_begin, page_size);
3231                            }
3232                          };
3233     if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
3234       // Besides fallback-mode, visit linear-alloc space in the pause for zygote
3235       // processes prior to first fork (that's when zygote space gets created).
3236       if (kIsDebugBuild && IsValidFd(uffd_)) {
3237         // All arenas allocated so far are expected to be pre-zygote fork.
3238         arena_pool->ForEachAllocatedArena(
3239             [](const TrackedArena& arena)
3240                 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
3241       }
3242       arena_pool->VisitRoots(arena_visitor);
3243     } else {
3244       // Inform the arena-pool that compaction is going on. So the TrackedArena
3245       // objects corresponding to the arenas that are freed shouldn't be deleted
3246       // immediately. We will do that in FinishPhase(). This is to avoid ABA
3247       // problem.
3248       arena_pool->DeferArenaFreeing();
3249       arena_pool->ForEachAllocatedArena(
3250           [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
3251               REQUIRES_SHARED(Locks::mutator_lock_) {
3252             // The pre-zygote fork arenas are not visited concurrently in the
3253             // zygote children processes. The native roots of the dirty objects
3254             // are visited during immune space visit below.
3255             if (!arena.IsPreZygoteForkArena()) {
3256               uint8_t* last_byte = arena.GetLastUsedByte();
3257               auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
3258               CHECK(ret.second);
3259             } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
3260               // Pre-zygote class-table and intern-table don't need to be updated.
3261               // TODO: Explore the possibility of using /proc/self/pagemap to
3262               // fetch which pages in these arenas are private-dirty and then only
3263               // visit those pages. To optimize it further, we can keep all
3264               // pre-zygote arenas in a single memory range so that just one read
3265               // from pagemap is sufficient.
3266               arena.VisitRoots(arena_visitor);
3267             }
3268           });
3269     }
3270     if (use_uffd_sigbus_) {
3271       // Release order wrt to mutator threads' SIGBUS handler load.
3272       sigbus_in_progress_count_.store(0, std::memory_order_release);
3273     }
3274     KernelPreparation();
3275   }
3276 
3277   UpdateNonMovingSpace();
3278   // fallback mode
3279   if (uffd_ == kFallbackMode) {
3280     CompactMovingSpace<kFallbackMode>(nullptr);
3281 
3282     int32_t freed_bytes = black_objs_slide_diff_;
3283     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3284     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3285   } else {
3286     DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u);
3287     DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
3288     if (!use_uffd_sigbus_) {
3289       // We must start worker threads before resuming mutators to avoid deadlocks.
3290       heap_->GetThreadPool()->StartWorkers(thread_running_gc_);
3291     }
3292   }
3293   stack_low_addr_ = nullptr;
3294 }
3295 
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size,int fd,uint8_t * shadow_addr)3296 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr,
3297                                             uint8_t* from_addr,
3298                                             size_t map_size,
3299                                             int fd,
3300                                             uint8_t* shadow_addr) {
3301   int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
3302   if (gHaveMremapDontunmap) {
3303     mremap_flags |= MREMAP_DONTUNMAP;
3304   }
3305 
3306   void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
3307   CHECK_EQ(ret, static_cast<void*>(from_addr))
3308       << "mremap to move pages failed: " << strerror(errno)
3309       << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
3310 
3311   if (shadow_addr != nullptr) {
3312     DCHECK_EQ(fd, kFdUnused);
3313     DCHECK(gHaveMremapDontunmap);
3314     ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr);
3315     CHECK_EQ(ret, static_cast<void*>(to_addr))
3316         << "mremap from shadow to to-space map failed: " << strerror(errno);
3317   } else if (!gHaveMremapDontunmap || fd > kFdUnused) {
3318     // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
3319     // the moving space again.
3320     int mmap_flags = MAP_FIXED;
3321     if (fd == kFdUnused) {
3322       // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
3323       // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
3324       // available, to avoid unmapping someone else' mapping and then causing
3325       // crashes elsewhere.
3326       mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
3327       // On some platforms MAP_ANONYMOUS expects fd to be -1.
3328       fd = -1;
3329     } else if (IsValidFd(fd)) {
3330       mmap_flags |= MAP_SHARED;
3331     } else {
3332       DCHECK_EQ(fd, kFdSharedAnon);
3333       mmap_flags |= MAP_SHARED | MAP_ANONYMOUS;
3334     }
3335     ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0);
3336     CHECK_EQ(ret, static_cast<void*>(to_addr))
3337         << "mmap for moving space failed: " << strerror(errno);
3338   }
3339 }
3340 
KernelPreparation()3341 void MarkCompact::KernelPreparation() {
3342   TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
3343   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
3344   size_t moving_space_size = bump_pointer_space_->Capacity();
3345   int mode = kCopyMode;
3346   size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3347   DCHECK_LE(moving_space_register_sz, moving_space_size);
3348   if (minor_fault_initialized_) {
3349     if (shadow_to_space_map_.IsValid()) {
3350       size_t shadow_size = shadow_to_space_map_.Size();
3351       void* addr = shadow_to_space_map_.Begin();
3352       if (shadow_size < moving_space_register_sz) {
3353         addr = mremap(addr,
3354                       shadow_size,
3355                       moving_space_register_sz,
3356                       // Don't allow moving with obj-ptr poisoning as the
3357                       // mapping needs to be in <4GB address space.
3358                       kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE,
3359                       /*new_address=*/nullptr);
3360         if (addr != MAP_FAILED) {
3361           // Succeeded in expanding the mapping. Update the MemMap entry for shadow map.
3362           MemMap temp = MemMap::MapPlaceholder(
3363               "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz);
3364           std::swap(shadow_to_space_map_, temp);
3365         }
3366       }
3367       if (addr != MAP_FAILED) {
3368         mode = kMinorFaultMode;
3369       } else {
3370         // We are not going to use shadow map. So protect it to catch any
3371         // potential bugs.
3372         DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
3373             << "mprotect failed: " << strerror(errno);
3374       }
3375     }
3376   }
3377 
3378   bool map_shared =
3379       minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_);
3380   uint8_t* shadow_addr = nullptr;
3381   if (moving_to_space_fd_ == kFdUnused && map_shared) {
3382     DCHECK(gHaveMremapDontunmap);
3383     DCHECK(shadow_to_space_map_.IsValid());
3384     DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size);
3385     shadow_addr = shadow_to_space_map_.Begin();
3386   }
3387 
3388   KernelPrepareRangeForUffd(moving_space_begin,
3389                             from_space_begin_,
3390                             moving_space_size,
3391                             moving_to_space_fd_,
3392                             shadow_addr);
3393 
3394   if (IsValidFd(uffd_)) {
3395     if (moving_space_register_sz > 0) {
3396       // mremap clears 'anon_vma' field of anonymous mappings. If we
3397       // uffd-register only the used portion of the space, then the vma gets
3398       // split (between used and unused portions) and as soon as pages are
3399       // mapped to the vmas, they get different `anon_vma` assigned, which
3400       // ensures that the two vmas cannot merge after we uffd-unregister the
3401       // used portion. OTOH, registering the entire space avoids the split, but
3402       // unnecessarily causes userfaults on allocations.
3403       // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
3404       // the vma-split in uffd-register. This ensures that when we unregister
3405       // the used portion after compaction, the two split vmas merge. This is
3406       // necessary for the mremap of the next GC cycle to not fail due to having
3407       // more than one vma in the source range.
3408       if (moving_space_register_sz < moving_space_size) {
3409         *const_cast<volatile uint8_t*>(moving_space_begin + moving_space_register_sz) = 0;
3410       }
3411       // Register the moving space with userfaultfd.
3412       RegisterUffd(moving_space_begin, moving_space_register_sz, mode);
3413     }
3414     // Prepare linear-alloc for concurrent compaction.
3415     for (auto& data : linear_alloc_spaces_data_) {
3416       bool mmap_again = map_shared && !data.already_shared_;
3417       DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3418       // There could be threads running in suspended mode when the compaction
3419       // pause is being executed. In order to make the userfaultfd setup atomic,
3420       // the registration has to be done *before* moving the pages to shadow map.
3421       if (!mmap_again) {
3422         // See the comment in the constructor as to why it's conditionally done.
3423         RegisterUffd(data.begin_,
3424                      data.shadow_.Size(),
3425                      minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
3426       }
3427       KernelPrepareRangeForUffd(data.begin_,
3428                                 data.shadow_.Begin(),
3429                                 data.shadow_.Size(),
3430                                 mmap_again ? kFdSharedAnon : kFdUnused);
3431       if (mmap_again) {
3432         data.already_shared_ = true;
3433         RegisterUffd(data.begin_,
3434                      data.shadow_.Size(),
3435                      minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
3436       }
3437     }
3438   }
3439   if (map_shared) {
3440     // Start mapping linear-alloc MAP_SHARED only after the compaction pause of
3441     // the first GC in non-zygote processes. This is the GC which sets up
3442     // mappings for using minor-fault in future. Up to this point we run
3443     // userfaultfd in copy-mode, which requires the mappings (of linear-alloc)
3444     // to be MAP_PRIVATE.
3445     map_linear_alloc_shared_ = true;
3446   }
3447 }
3448 
3449 template <int kMode>
ConcurrentCompaction(uint8_t * buf)3450 void MarkCompact::ConcurrentCompaction(uint8_t* buf) {
3451   DCHECK_NE(kMode, kFallbackMode);
3452   DCHECK(kMode != kCopyMode || buf != nullptr);
3453   size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3454   while (true) {
3455     struct uffd_msg msg;
3456     ssize_t nread = read(uffd_, &msg, sizeof(msg));
3457     CHECK_GT(nread, 0);
3458     CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT);
3459     DCHECK_EQ(nread, static_cast<ssize_t>(sizeof(msg)));
3460     uint8_t* fault_addr = reinterpret_cast<uint8_t*>(msg.arg.pagefault.address);
3461     if (fault_addr == conc_compaction_termination_page_) {
3462       // The counter doesn't need to be updated atomically as only one thread
3463       // would wake up against the gc-thread's load to this fault_addr. In fact,
3464       // the other threads would wake up serially because every exiting thread
3465       // will wake up gc-thread, which would retry load but again would find the
3466       // page missing. Also, the value will be flushed to caches due to the ioctl
3467       // syscall below.
3468       uint8_t ret = thread_pool_counter_--;
3469       // If 'gKernelHasFaultRetry == true' then only the last thread should map the
3470       // zeropage so that the gc-thread can proceed. Otherwise, each thread does
3471       // it and the gc-thread will repeat this fault until thread_pool_counter == 0.
3472       if (!gKernelHasFaultRetry || ret == 1) {
3473         ZeropageIoctl(fault_addr, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3474       } else {
3475         struct uffdio_range uffd_range;
3476         uffd_range.start = msg.arg.pagefault.address;
3477         uffd_range.len = gPageSize;
3478         CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0)
3479             << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: "
3480             << strerror(errno);
3481       }
3482       break;
3483     }
3484     uint8_t* fault_page = AlignDown(fault_addr, gPageSize);
3485     if (HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) {
3486       ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages);
3487     } else if (minor_fault_initialized_) {
3488       ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(
3489           fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3490     } else {
3491       ConcurrentlyProcessLinearAllocPage<kCopyMode>(
3492           fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3493     }
3494   }
3495 }
3496 
SigbusHandler(siginfo_t * info)3497 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3498   class ScopedInProgressCount {
3499    public:
3500     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3501       // Increment the count only if compaction is not done yet.
3502       SigbusCounterType prev =
3503           collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed);
3504       while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3505         if (collector_->sigbus_in_progress_count_.compare_exchange_strong(
3506                 prev, prev + 1, std::memory_order_acquire)) {
3507           DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3508           compaction_done_ = false;
3509           return;
3510         }
3511       }
3512       compaction_done_ = true;
3513     }
3514 
3515     bool IsCompactionDone() const {
3516       return compaction_done_;
3517     }
3518 
3519     ~ScopedInProgressCount() {
3520       if (!IsCompactionDone()) {
3521         collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release);
3522       }
3523     }
3524 
3525    private:
3526     MarkCompact* const collector_;
3527     bool compaction_done_;
3528   };
3529 
3530   DCHECK(use_uffd_sigbus_);
3531   if (info->si_code != BUS_ADRERR) {
3532     // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3533     // handled here.
3534     return false;
3535   }
3536 
3537   ScopedInProgressCount spc(this);
3538   uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3539   if (!spc.IsCompactionDone()) {
3540     if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3541       Thread* self = Thread::Current();
3542       Locks::mutator_lock_->AssertSharedHeld(self);
3543       size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3544       if (minor_fault_initialized_) {
3545         ConcurrentlyProcessMovingPage<kMinorFaultMode>(
3546             fault_page, nullptr, nr_moving_space_used_pages);
3547       } else {
3548         ConcurrentlyProcessMovingPage<kCopyMode>(
3549             fault_page, self->GetThreadLocalGcBuffer(), nr_moving_space_used_pages);
3550       }
3551       return true;
3552     } else {
3553       // Find the linear-alloc space containing fault-addr
3554       for (auto& data : linear_alloc_spaces_data_) {
3555         if (data.begin_ <= fault_page && data.end_ > fault_page) {
3556           if (minor_fault_initialized_) {
3557             ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false);
3558           } else {
3559             ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false);
3560           }
3561           return true;
3562         }
3563       }
3564       // Fault address doesn't belong to either moving-space or linear-alloc.
3565       return false;
3566     }
3567   } else {
3568     // We may spuriously get SIGBUS fault, which was initiated before the
3569     // compaction was finished, but ends up here. In that case, if the fault
3570     // address is valid then consider it handled.
3571     return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3572            linear_alloc_spaces_data_.end() !=
3573                std::find_if(linear_alloc_spaces_data_.begin(),
3574                             linear_alloc_spaces_data_.end(),
3575                             [fault_page](const LinearAllocSpaceData& data) {
3576                               return data.begin_ <= fault_page && data.end_ > fault_page;
3577                             });
3578   }
3579 }
3580 
3581 template <int kMode>
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages)3582 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3583                                                 uint8_t* buf,
3584                                                 size_t nr_moving_space_used_pages) {
3585   // TODO: add a class for Scoped dtor to set that a page has already mapped.
3586   // This helps in avoiding a zero-page ioctl in gc-thread before unregistering
3587   // unused space.
3588   class ScopedInProgressCount {
3589    public:
3590     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3591       collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed);
3592     }
3593 
3594     ~ScopedInProgressCount() {
3595       collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed);
3596     }
3597 
3598    private:
3599     MarkCompact* collector_;
3600   };
3601 
3602   Thread* self = Thread::Current();
3603   uint8_t* unused_space_begin =
3604       bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize;
3605   DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3606   DCHECK(kMode == kCopyMode || fault_page < unused_space_begin);
3607   if (kMode == kCopyMode && fault_page >= unused_space_begin) {
3608     // There is a race which allows more than one thread to install a
3609     // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3610     // the ioctl and move on.
3611     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3612     return;
3613   }
3614   size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
3615   DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3616   mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3617   if (first_obj == nullptr) {
3618     // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3619     uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3620     if (fault_page < self->GetTlabStart() || fault_page >= end) {
3621       end = fault_page + gPageSize;
3622     }
3623     size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3624     size_t length = 0;
3625     for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3626       uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3627       if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3628         DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3629         break;
3630       }
3631     }
3632     if (length > 0) {
3633       length =
3634           ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3635       for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3636         moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3637                                         std::memory_order_release);
3638       }
3639     }
3640     return;
3641   }
3642 
3643   uint32_t raw_state = moving_pages_status_[page_idx].load(
3644       use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed);
3645   uint32_t backoff_count = 0;
3646   PageState state;
3647   while (true) {
3648     state = GetPageStateFromWord(raw_state);
3649     if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3650         state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3651       if (!use_uffd_sigbus_) {
3652         break;
3653       }
3654       // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3655       // The wait is not expected to be long as the read state indicates that the other
3656       // thread is actively working on the page.
3657       BackOff(backoff_count++);
3658       raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3659     } else if (state == PageState::kProcessedAndMapped) {
3660       // Nothing to do.
3661       break;
3662     } else {
3663       // The increment to the in-progress counter must be done before updating
3664       // the page's state. Otherwise, we will end up leaving a window wherein
3665       // the GC-thread could observe that no worker is working on compaction
3666       // and could end up unregistering the moving space from userfaultfd.
3667       ScopedInProgressCount spc(this);
3668       // Acquire order to ensure we don't start writing to shadow map, which is
3669       // shared, before the CAS is successful. Release order to ensure that the
3670       // increment to moving_compaction_in_progress above is not re-ordered
3671       // after the CAS.
3672       if (state == PageState::kUnprocessed &&
3673           moving_pages_status_[page_idx].compare_exchange_strong(
3674               raw_state,
3675               static_cast<uint8_t>(PageState::kMutatorProcessing),
3676               std::memory_order_acq_rel)) {
3677         if (kMode == kMinorFaultMode) {
3678           DCHECK_EQ(buf, nullptr);
3679           buf = shadow_to_space_map_.Begin() + page_idx * gPageSize;
3680         } else if (UNLIKELY(buf == nullptr)) {
3681           DCHECK_EQ(kMode, kCopyMode);
3682           uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3683           // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3684           CHECK_LE(idx, kMutatorCompactionBufferCount);
3685           buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3686           DCHECK(compaction_buffers_map_.HasAddress(buf));
3687           self->SetThreadLocalGcBuffer(buf);
3688         }
3689 
3690         if (fault_page < post_compact_end_) {
3691           // The page has to be compacted.
3692           CompactPage(
3693               first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
3694         } else {
3695           DCHECK_NE(first_obj, nullptr);
3696           DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3697           uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3698           uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3699           mirror::Object* next_page_first_obj = nullptr;
3700           if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3701             next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3702           }
3703           DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3704           SlideBlackPage(first_obj,
3705                          next_page_first_obj,
3706                          first_chunk_size,
3707                          pre_compact_page,
3708                          buf,
3709                          kMode == kCopyMode);
3710         }
3711         // Nobody else would simultaneously modify this page's state so an
3712         // atomic store is sufficient. Use 'release' order to guarantee that
3713         // loads/stores to the page are finished before this store. Since the
3714         // mutator used its own buffer for the processing, there is no reason to
3715         // put its index in the status of the page. Also, the mutator is going
3716         // to immediately map the page, so that info is not needed.
3717         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3718                                              std::memory_order_release);
3719         if (kMode == kCopyMode) {
3720           CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false);
3721           // Store is sufficient as no other thread modifies the status at this stage.
3722           moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3723                                                std::memory_order_release);
3724           break;
3725         } else {
3726           // We don't support minor-fault feature anymore.
3727           UNREACHABLE();
3728         }
3729       }
3730       state = GetPageStateFromWord(raw_state);
3731       if (state == PageState::kProcessed) {
3732         size_t arr_len = moving_first_objs_count_ + black_page_count_;
3733         // The page is processed but not mapped. We should map it. The release
3734         // order used in MapMovingSpacePages will ensure that the increment to
3735         // moving_compaction_in_progress is done first.
3736         if (MapMovingSpacePages(page_idx,
3737                                 arr_len,
3738                                 /*from_fault=*/true,
3739                                 /*return_on_contention=*/false) > 0) {
3740           break;
3741         }
3742         raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3743       }
3744     }
3745   }
3746 }
3747 
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl)3748 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3749                                              uint8_t* start_shadow_page,
3750                                              Atomic<PageState>* state,
3751                                              size_t length,
3752                                              bool free_pages,
3753                                              bool single_ioctl) {
3754   DCHECK(!minor_fault_initialized_);
3755   DCHECK_ALIGNED_PARAM(length, gPageSize);
3756   Atomic<PageState>* madv_state = state;
3757   size_t madv_len = length;
3758   uint8_t* madv_start = start_shadow_page;
3759   bool check_state_for_madv = false;
3760   uint8_t* end_page = start_page + length;
3761   while (start_page < end_page) {
3762     size_t map_len = 0;
3763     // Find a contiguous range of pages that we can map in single ioctl.
3764     for (Atomic<PageState>* cur_state = state;
3765          map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3766          map_len += gPageSize, cur_state++) {
3767       // No body.
3768     }
3769 
3770     if (map_len == 0) {
3771       if (single_ioctl) {
3772         return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3773       }
3774       // Skip all the pages that this thread can't map.
3775       while (length > 0) {
3776         PageState s = state->load(std::memory_order_relaxed);
3777         if (s == PageState::kProcessed) {
3778           break;
3779         }
3780         // If we find any page which is being processed or mapped (only possible by a mutator(s))
3781         // then we need to re-check the page-state and, if needed, wait for the state to change
3782         // to 'mapped', before the shadow pages are reclaimed.
3783         check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3784         state++;
3785         length -= gPageSize;
3786         start_shadow_page += gPageSize;
3787         start_page += gPageSize;
3788       }
3789     } else {
3790       map_len = CopyIoctl(start_page,
3791                           start_shadow_page,
3792                           map_len,
3793                           /*return_on_contention=*/false);
3794       DCHECK_NE(map_len, 0u);
3795       if (use_uffd_sigbus_) {
3796         // Declare that the pages are ready to be accessed. Store is sufficient
3797         // as any thread will be storing the same value.
3798         for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3799           PageState s = state->load(std::memory_order_relaxed);
3800           DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped)
3801               << "state:" << s;
3802           state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3803         }
3804       } else {
3805         state += DivideByPageSize(map_len);
3806       }
3807       if (single_ioctl) {
3808         break;
3809       }
3810       start_page += map_len;
3811       start_shadow_page += map_len;
3812       length -= map_len;
3813       // state is already updated above.
3814     }
3815   }
3816   if (free_pages) {
3817     if (check_state_for_madv) {
3818       // Wait until all the pages are mapped before releasing them. This is needed to be
3819       // checked only if some mutators were found to be concurrently mapping pages earlier.
3820       for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3821         uint32_t backoff_count = 0;
3822         PageState s = madv_state->load(std::memory_order_relaxed);
3823         while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3824           BackOff(backoff_count++);
3825           s = madv_state->load(std::memory_order_relaxed);
3826         }
3827       }
3828     }
3829     ZeroAndReleaseMemory(madv_start, madv_len);
3830   }
3831   return true;
3832 }
3833 
3834 template <int kMode>
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool is_minor_fault)3835 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) {
3836   DCHECK(!is_minor_fault || kMode == kMinorFaultMode);
3837   auto arena_iter = linear_alloc_arenas_.end();
3838   {
3839     TrackedArena temp_arena(fault_page);
3840     arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3841     arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3842                                                             : linear_alloc_arenas_.end();
3843   }
3844   // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3845   // because a thread trying to access the page and as a result causing this
3846   // userfault confirms that nobody can delete the corresponding arena and
3847   // release its pages.
3848   // NOTE: We may have some memory range be recycled several times during a
3849   // compaction cycle, thereby potentially causing userfault on the same page
3850   // several times. That's not a problem as all of them (except for possibly the
3851   // first one) would require us mapping a zero-page, which we do without updating
3852   // the 'state_arr'.
3853   if (arena_iter == linear_alloc_arenas_.end() ||
3854       arena_iter->first->IsWaitingForDeletion() ||
3855       arena_iter->second <= fault_page) {
3856     // Fault page isn't in any of the arenas that existed before we started
3857     // compaction. So map zeropage and return.
3858     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
3859   } else {
3860     // Find the linear-alloc space containing fault-page
3861     LinearAllocSpaceData* space_data = nullptr;
3862     for (auto& data : linear_alloc_spaces_data_) {
3863       if (data.begin_ <= fault_page && fault_page < data.end_) {
3864         space_data = &data;
3865         break;
3866       }
3867     }
3868     DCHECK_NE(space_data, nullptr);
3869     ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3870     size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3871     Atomic<PageState>* state_arr =
3872         reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3873     PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire :
3874                                                                   std::memory_order_relaxed);
3875     uint32_t backoff_count = 0;
3876     while (true) {
3877       switch (state) {
3878         case PageState::kUnprocessed: {
3879           // Acquire order to ensure we don't start writing to shadow map, which is
3880           // shared, before the CAS is successful.
3881           if (state_arr[page_idx].compare_exchange_strong(
3882                   state, PageState::kProcessing, std::memory_order_acquire)) {
3883             if (kMode == kCopyMode || is_minor_fault) {
3884               LinearAllocPageUpdater updater(this);
3885               uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3886               // null first_obj indicates that it's a page from arena for
3887               // intern-table/class-table. So first object isn't required.
3888               if (first_obj != nullptr) {
3889                 updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3890               } else {
3891                 updater.SingleObjectArena(fault_page + diff, gPageSize);
3892               }
3893               if (kMode == kCopyMode) {
3894                 if (updater.WasLastPageTouched()) {
3895                   state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3896                   state = PageState::kProcessed;
3897                   continue;
3898                 } else {
3899                   // If the page wasn't touched, then it means it is empty and
3900                   // is most likely not present on the shadow-side. Furthermore,
3901                   // since the shadow is also userfaultfd registered doing copy
3902                   // ioctl fails as the copy-from-user in the kernel will cause
3903                   // userfault. Instead, just map a zeropage, which is not only
3904                   // correct but also efficient as it avoids unnecessary memcpy
3905                   // in the kernel.
3906                   ZeropageIoctl(
3907                       fault_page, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3908                   state_arr[page_idx].store(PageState::kProcessedAndMapped,
3909                                             std::memory_order_release);
3910                   return;
3911                 }
3912               }
3913             } else {
3914               // Don't touch the page in this case (there is no reason to do so
3915               // anyways) as it would mean reading from first_obj, which could be on
3916               // another missing page and hence may cause this thread to block, leading
3917               // to deadlocks.
3918               // Force read the page if it is missing so that a zeropage gets mapped on
3919               // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3920               ForceRead(fault_page + diff);
3921             }
3922             MapProcessedPages</*kFirstPageMapping=*/true>(
3923                 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3924             return;
3925           }
3926         }
3927           continue;
3928         case PageState::kProcessed:
3929           // Map as many pages as possible in a single ioctl, without spending
3930           // time freeing pages.
3931           if (MapUpdatedLinearAllocPages(fault_page,
3932                                          fault_page + diff,
3933                                          state_arr + page_idx,
3934                                          space_data->end_ - fault_page,
3935                                          /*free_pages=*/false,
3936                                          /*single_ioctl=*/true)) {
3937             return;
3938           }
3939           // fault_page was not mapped by this thread (some other thread claimed
3940           // it). Wait for it to be mapped before returning.
3941           FALLTHROUGH_INTENDED;
3942         case PageState::kProcessing:
3943         case PageState::kProcessingAndMapping:
3944         case PageState::kProcessedAndMapping:
3945           if (use_uffd_sigbus_) {
3946             // Wait for the page to be mapped before returning.
3947             BackOff(backoff_count++);
3948             state = state_arr[page_idx].load(std::memory_order_acquire);
3949             continue;
3950           }
3951           return;
3952         case PageState::kMutatorProcessing:
3953           LOG(FATAL) << "Unreachable";
3954           UNREACHABLE();
3955         case PageState::kProcessedAndMapped:
3956           // Somebody else took care of the page.
3957           return;
3958       }
3959       break;
3960     }
3961 
3962     DCHECK_EQ(kMode, kMinorFaultMode);
3963     DCHECK_EQ(state, PageState::kProcessed);
3964     if (!is_minor_fault) {
3965       // Force read the page if it is missing so that a zeropage gets mapped on
3966       // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3967       ForceRead(fault_page + diff);
3968     }
3969     MapProcessedPages</*kFirstPageMapping=*/false>(
3970         fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3971   }
3972 }
3973 
ProcessLinearAlloc()3974 void MarkCompact::ProcessLinearAlloc() {
3975   GcVisitedArenaPool* arena_pool =
3976       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3977   DCHECK_EQ(thread_running_gc_, Thread::Current());
3978   uint8_t* unmapped_range_start = nullptr;
3979   uint8_t* unmapped_range_end = nullptr;
3980   // Pointer to the linear-alloc space containing the current arena in the loop
3981   // below. Also helps in ensuring that two arenas, which are contiguous in
3982   // address space but are from different linear-alloc spaces, are not coalesced
3983   // into one range for mapping purpose.
3984   LinearAllocSpaceData* space_data = nullptr;
3985   Atomic<PageState>* state_arr = nullptr;
3986   ptrdiff_t diff = 0;
3987 
3988   auto map_pages = [&]() {
3989     DCHECK_NE(diff, 0);
3990     DCHECK_NE(space_data, nullptr);
3991     DCHECK_GE(unmapped_range_start, space_data->begin_);
3992     DCHECK_LT(unmapped_range_start, space_data->end_);
3993     DCHECK_GT(unmapped_range_end, space_data->begin_);
3994     DCHECK_LE(unmapped_range_end, space_data->end_);
3995     DCHECK_LT(unmapped_range_start, unmapped_range_end);
3996     DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3997     size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3998     MapUpdatedLinearAllocPages(unmapped_range_start,
3999                                unmapped_range_start + diff,
4000                                state_arr + page_idx,
4001                                unmapped_range_end - unmapped_range_start,
4002                                /*free_pages=*/true,
4003                                /*single_ioctl=*/false);
4004   };
4005   for (auto& pair : linear_alloc_arenas_) {
4006     const TrackedArena* arena = pair.first;
4007     size_t arena_size = arena->Size();
4008     uint8_t* arena_begin = arena->Begin();
4009     // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
4010     // in that order.
4011     DCHECK_LE(unmapped_range_end, arena_begin);
4012     DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
4013         << "space-begin:" << static_cast<void*>(space_data->begin_)
4014         << " arena-begin:" << static_cast<void*>(arena_begin);
4015     if (space_data == nullptr || space_data->end_ <= arena_begin) {
4016       // Map the processed arenas as we are switching to another space.
4017       if (space_data != nullptr && unmapped_range_end != nullptr) {
4018         map_pages();
4019         unmapped_range_end = nullptr;
4020       }
4021       // Find the linear-alloc space containing the arena
4022       LinearAllocSpaceData* curr_space_data = space_data;
4023       for (auto& data : linear_alloc_spaces_data_) {
4024         if (data.begin_ <= arena_begin && arena_begin < data.end_) {
4025           // Since arenas are sorted, the next space should be higher in address
4026           // order than the current one.
4027           DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
4028           diff = data.shadow_.Begin() - data.begin_;
4029           state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
4030           space_data = &data;
4031           break;
4032         }
4033       }
4034       CHECK_NE(space_data, curr_space_data)
4035           << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
4036     }
4037     // Map the processed arenas if we found a hole within the current space.
4038     if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
4039       map_pages();
4040       unmapped_range_end = nullptr;
4041     }
4042     if (unmapped_range_end == nullptr) {
4043       unmapped_range_start = unmapped_range_end = arena_begin;
4044     }
4045     DCHECK_NE(unmapped_range_start, nullptr);
4046     // It's ok to include all arenas in the unmapped range. Since the
4047     // corresponding state bytes will be kUnprocessed, we will skip calling
4048     // ioctl and madvise on arenas which are waiting to be deleted.
4049     unmapped_range_end += arena_size;
4050     {
4051       // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
4052       // does not get deleted at the same time. If this critical section is too
4053       // long and impacts mutator response time, then we get rid of this lock by
4054       // holding onto memory ranges of all deleted (since compaction pause)
4055       // arenas until completion finishes.
4056       ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
4057       // If any arenas were freed since compaction pause then skip them from
4058       // visiting.
4059       if (arena->IsWaitingForDeletion()) {
4060         continue;
4061       }
4062       uint8_t* last_byte = pair.second;
4063       DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
4064       auto visitor = [space_data, last_byte, diff, this, state_arr](
4065                          uint8_t* page_begin,
4066                          uint8_t* first_obj,
4067                          size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
4068         // No need to process pages past last_byte as they already have updated
4069         // gc-roots, if any.
4070         if (page_begin >= last_byte) {
4071           return;
4072         }
4073         LinearAllocPageUpdater updater(this);
4074         size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
4075         DCHECK_LT(page_idx, space_data->page_status_map_.Size());
4076         PageState expected_state = PageState::kUnprocessed;
4077         // Acquire order to ensure that we don't start accessing the shadow page,
4078         // which is shared with other threads, prior to CAS. Also, for same
4079         // reason, we used 'release' order for changing the state to 'processed'.
4080         if (state_arr[page_idx].compare_exchange_strong(
4081                 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
4082           // null first_obj indicates that it's a page from arena for
4083           // intern-table/class-table. So first object isn't required.
4084           if (first_obj != nullptr) {
4085             updater.MultiObjectArena(page_begin + diff, first_obj + diff);
4086           } else {
4087             DCHECK_EQ(page_size, gPageSize);
4088             updater.SingleObjectArena(page_begin + diff, page_size);
4089           }
4090           expected_state = PageState::kProcessing;
4091           if (!minor_fault_initialized_) {
4092             // Store is sufficient as no other thread could be modifying it. Use
4093             // release order to ensure that the writes to shadow page are
4094             // committed to memory before.
4095             if (updater.WasLastPageTouched()) {
4096               state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
4097             } else {
4098               // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
4099               ZeropageIoctl(
4100                   page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
4101               // Ioctl will act as release fence.
4102               state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
4103             }
4104           } else if (!state_arr[page_idx].compare_exchange_strong(
4105                          expected_state, PageState::kProcessed, std::memory_order_release)) {
4106             DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
4107             // Force read in case the page was missing and updater didn't touch it
4108             // as there was nothing to do. This will ensure that a zeropage is
4109             // faulted on the shadow map.
4110             ForceRead(page_begin + diff);
4111             MapProcessedPages</*kFirstPageMapping=*/true>(
4112                 page_begin, state_arr, page_idx, space_data->page_status_map_.Size());
4113           }
4114         }
4115       };
4116 
4117       arena->VisitRoots(visitor);
4118     }
4119   }
4120   if (unmapped_range_end > unmapped_range_start) {
4121     // Map remaining pages.
4122     map_pages();
4123   }
4124 }
4125 
RegisterUffd(void * addr,size_t size,int mode)4126 void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) {
4127   DCHECK(IsValidFd(uffd_));
4128   struct uffdio_register uffd_register;
4129   uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
4130   uffd_register.range.len = size;
4131   uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
4132   if (mode == kMinorFaultMode) {
4133     uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR;
4134   }
4135   CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
4136       << "ioctl_userfaultfd: register failed: " << strerror(errno)
4137       << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
4138 }
4139 
4140 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
4141 // when we unregister the unused portion of the moving-space). Implement support
4142 // for that.
UnregisterUffd(uint8_t * start,size_t len)4143 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
4144   DCHECK(IsValidFd(uffd_));
4145   struct uffdio_range range;
4146   range.start = reinterpret_cast<uintptr_t>(start);
4147   range.len = len;
4148   CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
4149       << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
4150       << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4151   // Due to an oversight in the kernel implementation of 'unregister', the
4152   // waiting threads are woken up only for copy uffds. Therefore, for now, we
4153   // have to explicitly wake up the threads in minor-fault case.
4154   // TODO: The fix in the kernel is being worked on. Once the kernel version
4155   // containing the fix is known, make it conditional on that as well.
4156   if (minor_fault_initialized_) {
4157     CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0)
4158         << "ioctl_userfaultfd: wake failed: " << strerror(errno)
4159         << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4160   }
4161 }
4162 
CompactionPhase()4163 void MarkCompact::CompactionPhase() {
4164   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4165   {
4166     int32_t freed_bytes = black_objs_slide_diff_;
4167     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
4168     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
4169   }
4170 
4171   if (CanCompactMovingSpaceWithMinorFault()) {
4172     CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr);
4173   } else {
4174     CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
4175   }
4176 
4177   ProcessLinearAlloc();
4178 
4179   if (use_uffd_sigbus_) {
4180     // Set compaction-done bit so that no new mutator threads start compaction
4181     // process in the SIGBUS handler.
4182     SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask,
4183                                                                  std::memory_order_acq_rel);
4184     // Wait for SIGBUS handlers already in play.
4185     for (uint32_t i = 0; count > 0; i++) {
4186       BackOff(i);
4187       count = sigbus_in_progress_count_.load(std::memory_order_acquire);
4188       count &= ~kSigbusCounterCompactionDoneMask;
4189     }
4190   } else {
4191     DCHECK(IsAlignedParam(conc_compaction_termination_page_, gPageSize));
4192     // We will only iterate once if gKernelHasFaultRetry is true.
4193     do {
4194       // madvise the page so that we can get userfaults on it.
4195       ZeroAndReleaseMemory(conc_compaction_termination_page_, gPageSize);
4196       // The following load triggers 'special' userfaults. When received by the
4197       // thread-pool workers, they will exit out of the compaction task. This fault
4198       // happens because we madvised the page.
4199       ForceRead(conc_compaction_termination_page_);
4200     } while (thread_pool_counter_ > 0);
4201   }
4202   // Unregister linear-alloc spaces
4203   for (auto& data : linear_alloc_spaces_data_) {
4204     DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
4205     UnregisterUffd(data.begin_, data.shadow_.Size());
4206     // madvise linear-allocs's page-status array. Note that we don't need to
4207     // madvise the shado-map as the pages from it were reclaimed in
4208     // ProcessLinearAlloc() after arenas were mapped.
4209     data.page_status_map_.MadviseDontNeedAndZero();
4210     if (minor_fault_initialized_) {
4211       DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0)
4212           << "mprotect failed: " << strerror(errno);
4213     }
4214   }
4215 
4216   // Make sure no mutator is reading from the from-space before unregistering
4217   // userfaultfd from moving-space and then zapping from-space. The mutator
4218   // and GC may race to set a page state to processing or further along. The two
4219   // attempts are ordered. If the collector wins, then the mutator will see that
4220   // and not access the from-space page. If the muator wins, then the
4221   // compaction_in_progress_count_ increment by the mutator happens-before the test
4222   // here, and we will not see a zero value until the mutator has completed.
4223   for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) {
4224     BackOff(i);
4225   }
4226   size_t moving_space_size = bump_pointer_space_->Capacity();
4227   size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
4228   if (used_size > 0) {
4229     UnregisterUffd(bump_pointer_space_->Begin(), used_size);
4230   }
4231   // Release all of the memory taken by moving-space's from-map
4232   if (minor_fault_initialized_) {
4233     if (IsValidFd(moving_from_space_fd_)) {
4234       // A strange behavior is observed wherein between GC cycles the from-space'
4235       // first page is accessed. But the memfd that is mapped on from-space, is
4236       // used on to-space in next GC cycle, causing issues with userfaultfd as the
4237       // page isn't missing. A possible reason for this could be prefetches. The
4238       // mprotect ensures that such accesses don't succeed.
4239       int ret = mprotect(from_space_begin_, moving_space_size, PROT_NONE);
4240       CHECK_EQ(ret, 0) << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4241       // madvise(MADV_REMOVE) needs PROT_WRITE. Use fallocate() instead, which
4242       // does the same thing.
4243       ret = fallocate(moving_from_space_fd_,
4244                       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
4245                       /*offset=*/0,
4246                       moving_space_size);
4247       CHECK_EQ(ret, 0) << "fallocate for from-space failed: " << strerror(errno);
4248     } else {
4249       // We don't have a valid fd, so use madvise(MADV_REMOVE) instead. mprotect
4250       // is not required in this case as we create fresh
4251       // MAP_SHARED+MAP_ANONYMOUS mapping in each GC cycle.
4252       int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE);
4253       CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno);
4254     }
4255   } else {
4256     from_space_map_.MadviseDontNeedAndZero();
4257   }
4258   // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
4259   if (shadow_to_space_map_.IsValid()) {
4260     DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
4261         << "mprotect(PROT_NONE) for shadow-map failed:" << strerror(errno);
4262   }
4263   if (!IsValidFd(moving_from_space_fd_)) {
4264     // The other case is already mprotected above.
4265     DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
4266         << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4267   }
4268 
4269   if (!use_uffd_sigbus_) {
4270     heap_->GetThreadPool()->StopWorkers(thread_running_gc_);
4271   }
4272 }
4273 
4274 template <size_t kBufferSize>
4275 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
4276  public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)4277   explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
4278         : mark_compact_(mark_compact), self_(self) {}
4279 
~ThreadRootsVisitor()4280   ~ThreadRootsVisitor() {
4281     Flush();
4282   }
4283 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4284   void VisitRoots(mirror::Object*** roots,
4285                   size_t count,
4286                   [[maybe_unused]] const RootInfo& info) override
4287       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4288     for (size_t i = 0; i < count; i++) {
4289       mirror::Object* obj = *roots[i];
4290       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4291         Push(obj);
4292       }
4293     }
4294   }
4295 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4296   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4297                   size_t count,
4298                   [[maybe_unused]] const RootInfo& info) override
4299       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4300     for (size_t i = 0; i < count; i++) {
4301       mirror::Object* obj = roots[i]->AsMirrorPtr();
4302       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4303         Push(obj);
4304       }
4305     }
4306   }
4307 
4308  private:
Flush()4309   void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
4310                REQUIRES(Locks::heap_bitmap_lock_) {
4311     StackReference<mirror::Object>* start;
4312     StackReference<mirror::Object>* end;
4313     {
4314       MutexLock mu(self_, mark_compact_->lock_);
4315       // Loop here because even after expanding once it may not be sufficient to
4316       // accommodate all references. It's almost impossible, but there is no harm
4317       // in implementing it this way.
4318       while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
4319         mark_compact_->ExpandMarkStack();
4320       }
4321     }
4322     while (idx_ > 0) {
4323       *start++ = roots_[--idx_];
4324     }
4325     DCHECK_EQ(start, end);
4326   }
4327 
Push(mirror::Object * obj)4328   void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
4329                                  REQUIRES(Locks::heap_bitmap_lock_) {
4330     if (UNLIKELY(idx_ >= kBufferSize)) {
4331       Flush();
4332     }
4333     roots_[idx_++].Assign(obj);
4334   }
4335 
4336   StackReference<mirror::Object> roots_[kBufferSize];
4337   size_t idx_ = 0;
4338   MarkCompact* const mark_compact_;
4339   Thread* const self_;
4340 };
4341 
4342 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
4343  public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)4344   explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
4345 
Run(Thread * thread)4346   void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
4347     ScopedTrace trace("Marking thread roots");
4348     // Note: self is not necessarily equal to thread since thread may be
4349     // suspended.
4350     Thread* const self = Thread::Current();
4351     CHECK(thread == self
4352           || thread->IsSuspended()
4353           || thread->GetState() == ThreadState::kWaitingPerformingGc)
4354         << thread->GetState() << " thread " << thread << " self " << self;
4355     {
4356       ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
4357       thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
4358     }
4359     // Clear page-buffer to prepare for compaction phase.
4360     thread->SetThreadLocalGcBuffer(nullptr);
4361 
4362     // If thread is a running mutator, then act on behalf of the garbage
4363     // collector. See the code in ThreadList::RunCheckpoint.
4364     mark_compact_->GetBarrier().Pass(self);
4365   }
4366 
4367  private:
4368   MarkCompact* const mark_compact_;
4369 };
4370 
MarkRootsCheckpoint(Thread * self,Runtime * runtime)4371 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
4372   // We revote TLABs later during paused round of marking.
4373   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4374   CheckpointMarkThreadRoots check_point(this);
4375   ThreadList* thread_list = runtime->GetThreadList();
4376   gc_barrier_.Init(self, 0);
4377   // Request the check point is run on all threads returning a count of the threads that must
4378   // run through the barrier including self.
4379   size_t barrier_count = thread_list->RunCheckpoint(&check_point);
4380   // Release locks then wait for all mutator threads to pass the barrier.
4381   // If there are no threads to wait which implys that all the checkpoint functions are finished,
4382   // then no need to release locks.
4383   if (barrier_count == 0) {
4384     return;
4385   }
4386   Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
4387   Locks::mutator_lock_->SharedUnlock(self);
4388   {
4389     ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
4390     gc_barrier_.Increment(self, barrier_count);
4391   }
4392   Locks::mutator_lock_->SharedLock(self);
4393   Locks::heap_bitmap_lock_->ExclusiveLock(self);
4394 }
4395 
MarkNonThreadRoots(Runtime * runtime)4396 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
4397   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4398   runtime->VisitNonThreadRoots(this);
4399 }
4400 
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)4401 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
4402   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4403   runtime->VisitConcurrentRoots(this, flags);
4404 }
4405 
RevokeAllThreadLocalBuffers()4406 void MarkCompact::RevokeAllThreadLocalBuffers() {
4407   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4408   bump_pointer_space_->RevokeAllThreadLocalBuffers();
4409 }
4410 
4411 class MarkCompact::ScanObjectVisitor {
4412  public:
ScanObjectVisitor(MarkCompact * const mark_compact)4413   explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
4414       : mark_compact_(mark_compact) {}
4415 
operator ()(ObjPtr<mirror::Object> obj) const4416   void operator()(ObjPtr<mirror::Object> obj) const
4417       ALWAYS_INLINE
4418       REQUIRES(Locks::heap_bitmap_lock_)
4419       REQUIRES_SHARED(Locks::mutator_lock_) {
4420     mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
4421   }
4422 
4423  private:
4424   MarkCompact* const mark_compact_;
4425 };
4426 
UpdateAndMarkModUnion()4427 void MarkCompact::UpdateAndMarkModUnion() {
4428   accounting::CardTable* const card_table = heap_->GetCardTable();
4429   for (const auto& space : immune_spaces_.GetSpaces()) {
4430     const char* name = space->IsZygoteSpace()
4431         ? "UpdateAndMarkZygoteModUnionTable"
4432         : "UpdateAndMarkImageModUnionTable";
4433     DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
4434     TimingLogger::ScopedTiming t(name, GetTimings());
4435     accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
4436     if (table != nullptr) {
4437       // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
4438       // that's fine because these objects are immutable enough (referent can
4439       // only be cleared) and hence the only referents they can have are intra-space.
4440       table->UpdateAndMarkReferences(this);
4441     } else {
4442       // No mod-union table, scan all dirty/aged cards in the corresponding
4443       // card-table. This can only occur for app images.
4444       card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
4445                                              space->Begin(),
4446                                              space->End(),
4447                                              ScanObjectVisitor(this),
4448                                              gc::accounting::CardTable::kCardAged);
4449     }
4450   }
4451 }
4452 
MarkReachableObjects()4453 void MarkCompact::MarkReachableObjects() {
4454   UpdateAndMarkModUnion();
4455   // Recursively mark all the non-image bits set in the mark bitmap.
4456   ProcessMarkStack();
4457 }
4458 
ScanDirtyObjects(bool paused,uint8_t minimum_age)4459 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
4460   accounting::CardTable* card_table = heap_->GetCardTable();
4461   for (const auto& space : heap_->GetContinuousSpaces()) {
4462     const char* name = nullptr;
4463     switch (space->GetGcRetentionPolicy()) {
4464     case space::kGcRetentionPolicyNeverCollect:
4465       name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
4466       break;
4467     case space::kGcRetentionPolicyFullCollect:
4468       name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
4469       break;
4470     case space::kGcRetentionPolicyAlwaysCollect:
4471       name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
4472       break;
4473     }
4474     TimingLogger::ScopedTiming t(name, GetTimings());
4475     card_table->Scan</*kClearCard*/ false>(
4476         space->GetMarkBitmap(), space->Begin(), space->End(), ScanObjectVisitor(this), minimum_age);
4477   }
4478 }
4479 
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)4480 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
4481   ScanDirtyObjects(paused, minimum_age);
4482   ProcessMarkStack();
4483 }
4484 
MarkRoots(VisitRootFlags flags)4485 void MarkCompact::MarkRoots(VisitRootFlags flags) {
4486   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4487   Runtime* runtime = Runtime::Current();
4488   // Make sure that the checkpoint which collects the stack roots is the first
4489   // one capturning GC-roots. As this one is supposed to find the address
4490   // everything allocated after that (during this marking phase) will be
4491   // considered 'marked'.
4492   MarkRootsCheckpoint(thread_running_gc_, runtime);
4493   MarkNonThreadRoots(runtime);
4494   MarkConcurrentRoots(flags, runtime);
4495 }
4496 
PreCleanCards()4497 void MarkCompact::PreCleanCards() {
4498   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4499   CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
4500   // Age the card-table before thread stack scanning checkpoint in MarkRoots()
4501   // as it ensures that there are no in-progress write barriers which started
4502   // prior to aging the card-table.
4503   PrepareCardTableForMarking(/*clear_alloc_space_cards*/ false);
4504   MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
4505   RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
4506 }
4507 
4508 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
4509 // in this case, then we need a stop-the-world (STW) round in the end to mark
4510 // objects which were written into concurrently while concurrent marking was
4511 // performed.
4512 // In order to minimize the pause time, we could take one of the two approaches:
4513 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
4514 // below a threshold.
4515 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
4516 // that it's taking too long, then resume mutators and retry.
4517 //
4518 // Given the non-trivial fixed overhead of running a round (card table and root
4519 // scan), it might be better to go with approach 2.
MarkingPhase()4520 void MarkCompact::MarkingPhase() {
4521   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4522   DCHECK_EQ(thread_running_gc_, Thread::Current());
4523   WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
4524   MaybeClampGcStructures();
4525   PrepareCardTableForMarking(/*clear_alloc_space_cards*/ true);
4526   MarkZygoteLargeObjects();
4527   MarkRoots(
4528         static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
4529   MarkReachableObjects();
4530   // Pre-clean dirtied cards to reduce pauses.
4531   PreCleanCards();
4532 
4533   // Setup reference processing and forward soft references once before enabling
4534   // slow path (in MarkingPause)
4535   ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
4536   bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
4537   rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
4538   if (!clear_soft_references) {
4539     // Forward as many SoftReferences as possible before inhibiting reference access.
4540     rp->ForwardSoftReferences(GetTimings());
4541   }
4542 }
4543 
4544 class MarkCompact::RefFieldsVisitor {
4545  public:
RefFieldsVisitor(MarkCompact * const mark_compact)4546   ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
4547     : mark_compact_(mark_compact) {}
4548 
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4549   ALWAYS_INLINE void operator()(mirror::Object* obj,
4550                                 MemberOffset offset,
4551                                 [[maybe_unused]] bool is_static) const
4552       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4553     if (kCheckLocks) {
4554       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4555       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4556     }
4557     mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
4558   }
4559 
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4560   void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
4561       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4562     mark_compact_->DelayReferenceReferent(klass, ref);
4563   }
4564 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4565   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
4566       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4567     if (!root->IsNull()) {
4568       VisitRoot(root);
4569     }
4570   }
4571 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4572   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4573       REQUIRES(Locks::heap_bitmap_lock_)
4574       REQUIRES_SHARED(Locks::mutator_lock_) {
4575     if (kCheckLocks) {
4576       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4577       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4578     }
4579     mark_compact_->MarkObject(root->AsMirrorPtr());
4580   }
4581 
4582  private:
4583   MarkCompact* const mark_compact_;
4584 };
4585 
4586 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4587 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4588   const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4589   size_t words = 0;
4590   for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4591     words += POPCOUNT(Bitmap::Begin()[index + i]);
4592   }
4593   return words * kAlignment;
4594 }
4595 
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4596 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4597   DCHECK(obj != nullptr);
4598   DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4599   uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4600   UpdateClassAfterObjectMap(obj);
4601   size_t size = RoundUp(obj_size, kAlignment);
4602   uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4603   size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4604   // Compute the bit-index within the chunk-info vector word.
4605   bit_index %= kBitsPerVectorWord;
4606   size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4607 
4608   chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4609   DCHECK_LE(first_chunk_portion, size);
4610   for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4611     DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4612     chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4613   }
4614   chunk_info_vec_[chunk_idx] += size;
4615   freed_objects_--;
4616 }
4617 
4618 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4619 void MarkCompact::ScanObject(mirror::Object* obj) {
4620   // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4621   // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4622   // here and pass that information to `UpdateLivenessInfo`.
4623   size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4624   bytes_scanned_ += obj_size;
4625 
4626   RefFieldsVisitor visitor(this);
4627   DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4628   if (kUpdateLiveWords && HasAddress(obj)) {
4629     UpdateLivenessInfo(obj, obj_size);
4630   }
4631   obj->VisitReferences(visitor, visitor);
4632 }
4633 
4634 // Scan anything that's on the mark stack.
ProcessMarkStack()4635 void MarkCompact::ProcessMarkStack() {
4636   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4637   // TODO: try prefetch like in CMS
4638   while (!mark_stack_->IsEmpty()) {
4639     mirror::Object* obj = mark_stack_->PopBack();
4640     DCHECK(obj != nullptr);
4641     ScanObject</*kUpdateLiveWords*/ true>(obj);
4642   }
4643 }
4644 
ExpandMarkStack()4645 void MarkCompact::ExpandMarkStack() {
4646   const size_t new_size = mark_stack_->Capacity() * 2;
4647   std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4648                                                    mark_stack_->End());
4649   mark_stack_->Resize(new_size);
4650   for (auto& ref : temp) {
4651     mark_stack_->PushBack(ref.AsMirrorPtr());
4652   }
4653   DCHECK(!mark_stack_->IsFull());
4654 }
4655 
PushOnMarkStack(mirror::Object * obj)4656 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4657   if (UNLIKELY(mark_stack_->IsFull())) {
4658     ExpandMarkStack();
4659   }
4660   mark_stack_->PushBack(obj);
4661 }
4662 
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4663 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4664                                            mirror::Object* holder,
4665                                            MemberOffset offset) {
4666   DCHECK(obj != nullptr);
4667   if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4668     PushOnMarkStack(obj);
4669   }
4670 }
4671 
4672 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4673 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4674                                                  mirror::Object* holder,
4675                                                  MemberOffset offset) {
4676   // We expect most of the referenes to be in bump-pointer space, so try that
4677   // first to keep the cost of this function minimal.
4678   if (LIKELY(HasAddress(obj))) {
4679     return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4680                      : !moving_space_bitmap_->Set(obj);
4681   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4682     return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4683                      : !non_moving_space_bitmap_->Set(obj);
4684   } else if (immune_spaces_.ContainsObject(obj)) {
4685     DCHECK(IsMarked(obj) != nullptr);
4686     return false;
4687   } else {
4688     // Must be a large-object space, otherwise it's a case of heap corruption.
4689     if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4690       // Objects in large-object space are aligned to the large-object alignment.
4691       // So if we have an object which doesn't belong to any space and is not
4692       // page-aligned as well, then it's memory corruption.
4693       // TODO: implement protect/unprotect in bump-pointer space.
4694       heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4695     }
4696     DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4697         << "ref=" << obj
4698         << " doesn't belong to any of the spaces and large object space doesn't exist";
4699     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4700     DCHECK(los_bitmap->HasAddress(obj));
4701     if (kParallel) {
4702       los_bitmap->AtomicTestAndSet(obj);
4703     } else {
4704       los_bitmap->Set(obj);
4705     }
4706     // We only have primitive arrays in large object space. So there is no
4707     // reason to push into mark-stack.
4708     DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4709     return false;
4710   }
4711 }
4712 
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4713 inline void MarkCompact::MarkObject(mirror::Object* obj,
4714                                     mirror::Object* holder,
4715                                     MemberOffset offset) {
4716   if (obj != nullptr) {
4717     MarkObjectNonNull(obj, holder, offset);
4718   }
4719 }
4720 
MarkObject(mirror::Object * obj)4721 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4722   MarkObject(obj, nullptr, MemberOffset(0));
4723   return obj;
4724 }
4725 
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4726 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4727                                     [[maybe_unused]] bool do_atomic_update) {
4728   MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4729 }
4730 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4731 void MarkCompact::VisitRoots(mirror::Object*** roots,
4732                              size_t count,
4733                              const RootInfo& info) {
4734   if (compacting_) {
4735     uint8_t* moving_space_begin = moving_space_begin_;
4736     uint8_t* moving_space_end = moving_space_end_;
4737     for (size_t i = 0; i < count; ++i) {
4738       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4739     }
4740   } else {
4741     for (size_t i = 0; i < count; ++i) {
4742       MarkObjectNonNull(*roots[i]);
4743     }
4744   }
4745 }
4746 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4747 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4748                              size_t count,
4749                              const RootInfo& info) {
4750   // TODO: do we need to check if the root is null or not?
4751   if (compacting_) {
4752     uint8_t* moving_space_begin = moving_space_begin_;
4753     uint8_t* moving_space_end = moving_space_end_;
4754     for (size_t i = 0; i < count; ++i) {
4755       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4756     }
4757   } else {
4758     for (size_t i = 0; i < count; ++i) {
4759       MarkObjectNonNull(roots[i]->AsMirrorPtr());
4760     }
4761   }
4762 }
4763 
IsMarked(mirror::Object * obj)4764 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4765   if (HasAddress(obj)) {
4766     const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4767     if (compacting_) {
4768       if (is_black) {
4769         return PostCompactBlackObjAddr(obj);
4770       } else if (live_words_bitmap_->Test(obj)) {
4771         return PostCompactOldObjAddr(obj);
4772       } else {
4773         return nullptr;
4774       }
4775     }
4776     return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4777   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4778     return non_moving_space_bitmap_->Test(obj) ? obj : nullptr;
4779   } else if (immune_spaces_.ContainsObject(obj)) {
4780     return obj;
4781   } else {
4782     DCHECK(heap_->GetLargeObjectsSpace())
4783         << "ref=" << obj
4784         << " doesn't belong to any of the spaces and large object space doesn't exist";
4785     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4786     if (los_bitmap->HasAddress(obj)) {
4787       DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4788       return los_bitmap->Test(obj) ? obj : nullptr;
4789     } else {
4790       // The given obj is not in any of the known spaces, so return null. This could
4791       // happen for instance in interpreter caches wherein a concurrent updation
4792       // to the cache could result in obj being a non-reference. This is
4793       // tolerable because SweepInterpreterCaches only updates if the given
4794       // object has moved, which can't be the case for the non-reference.
4795       return nullptr;
4796     }
4797   }
4798 }
4799 
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4800 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4801                                               [[maybe_unused]] bool do_atomic_update) {
4802   mirror::Object* ref = obj->AsMirrorPtr();
4803   if (ref == nullptr) {
4804     return true;
4805   }
4806   return IsMarked(ref);
4807 }
4808 
4809 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4810 // has not yet been marked, put it on the appropriate list in the heap for later
4811 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4812 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4813                                          ObjPtr<mirror::Reference> ref) {
4814   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4815 }
4816 
FinishPhase()4817 void MarkCompact::FinishPhase() {
4818   GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4819   bool is_zygote = Runtime::Current()->IsZygote();
4820   compacting_ = false;
4821   minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_;
4822   // Madvise compaction buffers. When using threaded implementation, skip the first page,
4823   // which is used by the gc-thread for the next iteration. Otherwise, we get into a
4824   // deadlock due to userfault on it in the next iteration. This page is not consuming any
4825   // physical memory because we already madvised it above and then we triggered a read
4826   // userfault, which maps a special zero-page.
4827   if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() ||
4828       shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * gPageSize) {
4829     size_t adjustment = use_uffd_sigbus_ ? 0 : gPageSize;
4830     ZeroAndReleaseMemory(compaction_buffers_map_.Begin() + adjustment,
4831                          compaction_buffers_map_.Size() - adjustment);
4832   } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
4833     // Now that we are going to use minor-faults from next GC cycle, we can
4834     // unmap the buffers used by worker threads.
4835     compaction_buffers_map_.SetSize(gPageSize);
4836   }
4837   info_map_.MadviseDontNeedAndZero();
4838   live_words_bitmap_->ClearBitmap();
4839   // TODO: We can clear this bitmap right before compaction pause. But in that
4840   // case we need to ensure that we don't assert on this bitmap afterwards.
4841   // Also, we would still need to clear it here again as we may have to use the
4842   // bitmap for black-allocations (see UpdateMovingSpaceBlackAllocations()).
4843   moving_space_bitmap_->Clear();
4844 
4845   if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4846     heap_->DeleteThreadPool();
4847     // This unregisters all ranges as a side-effect.
4848     close(uffd_);
4849     uffd_ = kFdUnused;
4850     uffd_initialized_ = false;
4851   }
4852   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
4853   mark_stack_->Reset();
4854   DCHECK_EQ(thread_running_gc_, Thread::Current());
4855   if (kIsDebugBuild) {
4856     MutexLock mu(thread_running_gc_, lock_);
4857     if (updated_roots_.get() != nullptr) {
4858       updated_roots_->clear();
4859     }
4860   }
4861   class_after_obj_ordered_map_.clear();
4862   linear_alloc_arenas_.clear();
4863   {
4864     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4865     WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4866     heap_->ClearMarkedObjects();
4867   }
4868   std::swap(moving_to_space_fd_, moving_from_space_fd_);
4869   if (IsValidFd(moving_to_space_fd_)) {
4870     // Confirm that the memfd to be used on to-space in next GC cycle is empty.
4871     struct stat buf;
4872     DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno);
4873     DCHECK_EQ(buf.st_blocks, 0u);
4874   }
4875   GcVisitedArenaPool* arena_pool =
4876       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
4877   arena_pool->DeleteUnusedArenas();
4878 }
4879 
4880 }  // namespace collector
4881 }  // namespace gc
4882 }  // namespace art
4883