1 /*
2 * Copyright 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/task_processor.h"
52 #include "gc/verification-inl.h"
53 #include "jit/jit_code_cache.h"
54 #include "mark_compact-inl.h"
55 #include "mirror/object-refvisitor-inl.h"
56 #include "read_barrier_config.h"
57 #include "scoped_thread_state_change-inl.h"
58 #include "sigchain.h"
59 #include "thread_list.h"
60
61 #ifdef ART_TARGET_ANDROID
62 #include "android-modules-utils/sdk_level.h"
63 #include "com_android_art.h"
64 #endif
65
66 #ifndef __BIONIC__
67 #ifndef MREMAP_DONTUNMAP
68 #define MREMAP_DONTUNMAP 4
69 #endif
70 #ifndef MAP_FIXED_NOREPLACE
71 #define MAP_FIXED_NOREPLACE 0x100000
72 #endif
73 #endif // __BIONIC__
74
75 // See aosp/2996596 for where these values came from.
76 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
77 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
78 #endif
79 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
80 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
81 #endif
82 #ifdef ART_TARGET_ANDROID
83 namespace {
84
85 using ::android::base::GetBoolProperty;
86 using ::android::base::ParseBool;
87 using ::android::base::ParseBoolResult;
88 using ::android::modules::sdklevel::IsAtLeastV;
89
90 } // namespace
91 #endif
92
93 namespace art HIDDEN {
94
HaveMremapDontunmap()95 static bool HaveMremapDontunmap() {
96 const size_t page_size = GetPageSizeSlow();
97 void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
98 CHECK_NE(old, MAP_FAILED);
99 void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
100 CHECK_EQ(munmap(old, page_size), 0);
101 if (addr != MAP_FAILED) {
102 CHECK_EQ(munmap(addr, page_size), 0);
103 return true;
104 } else {
105 return false;
106 }
107 }
108
109 static bool gUffdSupportsMmapTrylock = false;
110 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
111 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
112 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
113 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
114 static uint64_t gUffdFeatures = 0;
115 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
116 static constexpr uint64_t kUffdFeaturesForMinorFault =
117 UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
118 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
119
120 // We consider SIGBUS feature necessary to enable this GC as it's superior than
121 // threading-based implementation for janks. However, since we have the latter
122 // already implemented, for testing purposes, we allow choosing either of the
123 // two at boot time in the constructor below.
124 // We may want minor-fault in future to be available for making jit-code-cache
125 // updation concurrent, which uses shmem.
KernelSupportsUffd()126 bool KernelSupportsUffd() {
127 #ifdef __linux__
128 if (gHaveMremapDontunmap) {
129 int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
130 // On non-android devices we may not have the kernel patches that restrict
131 // userfaultfd to user mode. But that is not a security concern as we are
132 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
133 if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
134 fd = syscall(__NR_userfaultfd, O_CLOEXEC);
135 }
136 if (fd >= 0) {
137 // We are only fetching the available features, which is returned by the
138 // ioctl.
139 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
140 CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
141 gUffdFeatures = api.features;
142 // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
143 // versions will have per-vma locks. The lower ones don't support
144 // userfaultfd.
145 if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
146 // Check if MMAP_TRYLOCK feature is supported
147 const size_t page_size = GetPageSizeSlow();
148 void* mem =
149 mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
150 CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
151
152 struct uffdio_zeropage uffd_zeropage;
153 uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
154 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
155 uffd_zeropage.range.len = page_size;
156 uffd_zeropage.zeropage = 0;
157 // The ioctl will definitely fail as mem is not registered with uffd.
158 CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
159 // uffd ioctls return EINVAL for several reasons. We make sure with
160 // (proper alignment of 'mem' and 'len') that, before updating
161 // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
162 // `trylock` isn't available.
163 if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
164 LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
165 << " page-size:" << page_size;
166 } else {
167 gUffdSupportsMmapTrylock = true;
168 LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
169 << " size:" << page_size;
170 }
171 munmap(mem, page_size);
172 }
173 close(fd);
174 // Minimum we need is sigbus feature for using userfaultfd.
175 return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
176 }
177 }
178 #endif
179 return false;
180 }
181
182 // The other cases are defined as constexpr in runtime/read_barrier_config.h
183 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
184 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()185 static gc::CollectorType FetchCmdlineGcType() {
186 std::string argv;
187 gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
188 if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
189 auto pos = argv.rfind("-Xgc:");
190 if (argv.substr(pos + 5, 3) == "CMC") {
191 gc_type = gc::CollectorType::kCollectorTypeCMC;
192 } else if (argv.substr(pos + 5, 2) == "CC") {
193 gc_type = gc::CollectorType::kCollectorTypeCC;
194 }
195 }
196 return gc_type;
197 }
198
199 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()200 static int GetOverrideCacheInfoFd() {
201 std::string args_str;
202 if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
203 LOG(WARNING) << "Failed to load /proc/self/cmdline";
204 return -1;
205 }
206 std::vector<std::string_view> args;
207 Split(std::string_view(args_str), /*separator=*/'\0', &args);
208 for (std::string_view arg : args) {
209 if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag.
210 int fd;
211 if (!android::base::ParseInt(std::string(arg), &fd)) {
212 LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
213 return -1;
214 }
215 return fd;
216 }
217 }
218 return -1;
219 }
220
GetCachedProperties()221 static std::unordered_map<std::string, std::string> GetCachedProperties() {
222 // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
223 static bool called = false;
224 CHECK(!called) << "GetCachedBoolProperty can be called only once";
225 called = true;
226
227 std::string cache_info_contents;
228 int fd = GetOverrideCacheInfoFd();
229 if (fd >= 0) {
230 if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
231 PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
232 return {};
233 }
234 } else {
235 std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
236 if (!android::base::ReadFileToString(path, &cache_info_contents)) {
237 // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
238 // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
239 // nothing we can do.
240 if (errno != ENOENT) {
241 PLOG(ERROR) << "Failed to read cache-info from the default path";
242 }
243 return {};
244 }
245 }
246
247 std::optional<com::android::art::CacheInfo> cache_info =
248 com::android::art::parse(cache_info_contents.c_str());
249 if (!cache_info.has_value()) {
250 // This should never happen.
251 LOG(ERROR) << "Failed to parse cache-info";
252 return {};
253 }
254 const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
255 if (list == nullptr) {
256 // This should never happen.
257 LOG(ERROR) << "Missing system properties from cache-info";
258 return {};
259 }
260 const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
261 std::unordered_map<std::string, std::string> result;
262 for (const com::android::art::KeyValuePair& pair : properties) {
263 result[pair.getK()] = pair.getV();
264 }
265 return result;
266 }
267
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)268 static bool GetCachedBoolProperty(
269 const std::unordered_map<std::string, std::string>& cached_properties,
270 const std::string& key,
271 bool default_value) {
272 auto it = cached_properties.find(key);
273 if (it == cached_properties.end()) {
274 return default_value;
275 }
276 ParseBoolResult result = ParseBool(it->second);
277 switch (result) {
278 case ParseBoolResult::kTrue:
279 return true;
280 case ParseBoolResult::kFalse:
281 return false;
282 case ParseBoolResult::kError:
283 return default_value;
284 }
285 }
286
SysPropSaysUffdGc()287 static bool SysPropSaysUffdGc() {
288 // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
289 // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
290 std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
291 bool phenotype_enable = GetCachedBoolProperty(
292 cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
293 bool phenotype_force_disable = GetCachedBoolProperty(
294 cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
295 bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
296 bool is_at_most_u = !IsAtLeastV();
297 return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
298 }
299 #else
300 // Never called.
SysPropSaysUffdGc()301 static bool SysPropSaysUffdGc() { return false; }
302 #endif
303
ShouldUseUserfaultfd()304 static bool ShouldUseUserfaultfd() {
305 static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
306 #ifdef __linux__
307 // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
308 // always use CC on host. On target, use CMC only if system properties says so
309 // and the kernel supports it.
310 gc::CollectorType gc_type = FetchCmdlineGcType();
311 return gc_type == gc::CollectorType::kCollectorTypeCMC ||
312 (gc_type == gc::CollectorType::kCollectorTypeNone &&
313 kIsTargetAndroid &&
314 SysPropSaysUffdGc() &&
315 KernelSupportsUffd());
316 #else
317 return false;
318 #endif
319 }
320
321 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
322 const bool gUseReadBarrier = !gUseUserfaultfd;
323 #endif
324
325 namespace gc {
326 namespace collector {
327
328 // Turn off kCheckLocks when profiling the GC as it slows down the GC
329 // significantly.
330 static constexpr bool kCheckLocks = kDebugLocking;
331 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
332 // Two threads should suffice on devices.
333 static constexpr size_t kMaxNumUffdWorkers = 2;
334 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
335 // case. It's extremely unlikely that we will ever have more than these number
336 // of mutator threads trying to access the moving-space during one compaction
337 // phase.
338 static constexpr size_t kMutatorCompactionBufferCount = 2048;
339 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
340 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
341 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
342 // Concurrent compaction termination logic is different (and slightly more efficient) if the
343 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
344 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
345 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
346 // up the GC thread at the end.
347 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
348
GetUffdAndMinorFault()349 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
350 bool uffd_available;
351 // In most cases the gUffdFeatures will already be initialized at boot time
352 // when libart is loaded. On very old kernels we may get '0' from the kernel,
353 // in which case we would be doing the syscalls each time this function is
354 // called. But that's very unlikely case. There are no correctness issues as
355 // the response from kernel never changes after boot.
356 if (UNLIKELY(gUffdFeatures == 0)) {
357 uffd_available = KernelSupportsUffd();
358 } else {
359 // We can have any uffd features only if uffd exists.
360 uffd_available = true;
361 }
362 bool minor_fault_available =
363 (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
364 return std::pair<bool, bool>(uffd_available, minor_fault_available);
365 }
366
CreateUserfaultfd(bool post_fork)367 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
368 if (post_fork || uffd_ == kFdUnused) {
369 // Check if we have MREMAP_DONTUNMAP here for cases where
370 // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
371 // that userfaultfd isn't used on old kernels, which cause random ioctl
372 // failures.
373 if (gHaveMremapDontunmap) {
374 // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
375 // any read event available. We don't use poll.
376 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
377 // On non-android devices we may not have the kernel patches that restrict
378 // userfaultfd to user mode. But that is not a security concern as we are
379 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
380 if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
381 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
382 }
383 if (UNLIKELY(uffd_ == -1)) {
384 uffd_ = kFallbackMode;
385 LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
386 << ") and therefore falling back to stop-the-world compaction.";
387 } else {
388 DCHECK(IsValidFd(uffd_));
389 // Initialize uffd with the features which are required and available.
390 // Using private anonymous mapping in threading mode is the default,
391 // for which we don't need to ask for any features. Note: this mode
392 // is not used in production.
393 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
394 if (use_uffd_sigbus_) {
395 // We should add SIGBUS feature only if we plan on using it as
396 // requesting it here will mean threading mode will not work.
397 CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
398 api.features |= kUffdFeaturesForSigbus;
399 }
400 if (uffd_minor_fault_supported_) {
401 // NOTE: This option is currently disabled.
402 CHECK_EQ(gUffdFeatures & kUffdFeaturesForMinorFault, kUffdFeaturesForMinorFault);
403 api.features |= kUffdFeaturesForMinorFault;
404 }
405 CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
406 << "ioctl_userfaultfd: API: " << strerror(errno);
407 }
408 } else {
409 uffd_ = kFallbackMode;
410 }
411 }
412 uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
413 return IsValidFd(uffd_);
414 }
415
416 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)417 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
418 uintptr_t begin, uintptr_t end) {
419 return static_cast<LiveWordsBitmap<kAlignment>*>(
420 MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
421 }
422
IsSigbusFeatureAvailable()423 static bool IsSigbusFeatureAvailable() {
424 MarkCompact::GetUffdAndMinorFault();
425 return (gUffdFeatures & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
426 }
427
ComputeInfoMapSize()428 size_t MarkCompact::ComputeInfoMapSize() {
429 size_t moving_space_size = bump_pointer_space_->Capacity();
430 size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
431 size_t nr_moving_pages = DivideByPageSize(moving_space_size);
432 size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
433 return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
434 nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
435 }
436
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)437 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
438 size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
439
440 chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
441 vector_length_ = moving_space_sz / kOffsetChunkSize;
442 size_t total = vector_length_ * sizeof(uint32_t);
443
444 first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
445 total += nr_moving_pages * sizeof(ObjReference);
446
447 pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
448 total += nr_moving_pages * sizeof(uint32_t);
449
450 moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
451 total += nr_moving_pages * sizeof(Atomic<uint32_t>);
452
453 first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
454 total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
455 DCHECK_EQ(total, ComputeInfoMapSize());
456 return total;
457 }
458
MarkCompact(Heap * heap)459 MarkCompact::MarkCompact(Heap* heap)
460 : GarbageCollector(heap, "concurrent mark compact"),
461 gc_barrier_(0),
462 lock_("mark compact lock", kGenericBottomLock),
463 bump_pointer_space_(heap->GetBumpPointerSpace()),
464 moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
465 moving_space_begin_(bump_pointer_space_->Begin()),
466 moving_space_end_(bump_pointer_space_->Limit()),
467 moving_to_space_fd_(kFdUnused),
468 moving_from_space_fd_(kFdUnused),
469 uffd_(kFdUnused),
470 sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask),
471 compaction_in_progress_count_(0),
472 thread_pool_counter_(0),
473 compacting_(false),
474 uffd_initialized_(false),
475 uffd_minor_fault_supported_(false),
476 use_uffd_sigbus_(IsSigbusFeatureAvailable()),
477 minor_fault_initialized_(false),
478 map_linear_alloc_shared_(false),
479 clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
480 if (kIsDebugBuild) {
481 updated_roots_.reset(new std::unordered_set<void*>());
482 }
483 // TODO: When using minor-fault feature, the first GC after zygote-fork
484 // requires mapping the linear-alloc again with MAP_SHARED. This leaves a
485 // gap for suspended threads to access linear-alloc when it's empty (after
486 // mremap) and not yet userfaultfd registered. This cannot be fixed by merely
487 // doing uffd registration first. For now, just assert that we are not using
488 // minor-fault. Eventually, a cleanup of linear-alloc update logic to only
489 // use private anonymous would be ideal.
490 CHECK(!uffd_minor_fault_supported_);
491 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
492
493 // TODO: Depending on how the bump-pointer space move is implemented. If we
494 // switch between two virtual memories each time, then we will have to
495 // initialize live_words_bitmap_ accordingly.
496 live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
497 reinterpret_cast<uintptr_t>(moving_space_begin),
498 reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
499
500 std::string err_msg;
501 size_t moving_space_size = bump_pointer_space_->Capacity();
502 {
503 // Create one MemMap for all the data structures
504 info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
505 ComputeInfoMapSize(),
506 PROT_READ | PROT_WRITE,
507 /*low_4gb=*/false,
508 &err_msg);
509 if (UNLIKELY(!info_map_.IsValid())) {
510 LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
511 } else {
512 size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
513 DCHECK_EQ(total, info_map_.Size());
514 }
515 }
516
517 size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
518 // The moving space is created at a fixed address, which is expected to be
519 // PMD-size aligned.
520 if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
521 LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
522 << ". This can lead to longer stop-the-world pauses for compaction";
523 }
524 // NOTE: PROT_NONE is used here as these mappings are for address space reservation
525 // only and will be used only after appropriately remapping them.
526 from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
527 moving_space_size,
528 PROT_NONE,
529 /*low_4gb=*/kObjPtrPoisoning,
530 moving_space_alignment,
531 &err_msg);
532 if (UNLIKELY(!from_space_map_.IsValid())) {
533 LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
534 } else {
535 from_space_begin_ = from_space_map_.Begin();
536 }
537
538 // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3
539 // heap-sized mappings in low-4GB. So tolerate failure here by attempting to
540 // mmap again right before the compaction pause. And if even that fails, then
541 // running the GC cycle in copy-mode rather than minor-fault.
542 //
543 // This map doesn't have to be aligned to 2MB as we don't mremap on it.
544 if (!kObjPtrPoisoning && uffd_minor_fault_supported_) {
545 // We need this map only if minor-fault feature is supported. But in that case
546 // don't create the mapping if obj-ptr poisoning is enabled as then the mapping
547 // has to be created in low_4gb. Doing this here rather than later causes the
548 // Dex2oatImageTest.TestExtension gtest to fail in 64-bit platforms.
549 shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
550 moving_space_size,
551 PROT_NONE,
552 /*low_4gb=*/false,
553 &err_msg);
554 if (!shadow_to_space_map_.IsValid()) {
555 LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg;
556 }
557 }
558 const size_t num_pages =
559 1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount :
560 std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
561 compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
562 gPageSize * num_pages,
563 PROT_READ | PROT_WRITE,
564 /*low_4gb=*/kObjPtrPoisoning,
565 &err_msg);
566 if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
567 LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
568 }
569 // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
570 conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
571 // Touch the page deliberately to avoid userfaults on it. We madvise it in
572 // CompactionPhase() before using it to terminate concurrent compaction.
573 ForceRead(conc_compaction_termination_page_);
574
575 // In most of the cases, we don't expect more than one LinearAlloc space.
576 linear_alloc_spaces_data_.reserve(1);
577
578 // Ensure that huge-pages are not used on the moving-space, which may happen
579 // if THP is 'always' enabled and breaks our assumption that a normal-page is
580 // mapped when any address is accessed.
581 int ret = madvise(moving_space_begin, moving_space_size, MADV_NOHUGEPAGE);
582 // Some devices may not have THP configured in the kernel. On such devices
583 // madvise will fail with EINVAL. Obviously, on such devices this madvise is
584 // not required in the first place.
585 CHECK(ret == 0 || errno == EINVAL);
586
587 // Initialize GC metrics.
588 metrics::ArtMetrics* metrics = GetMetrics();
589 // The mark-compact collector supports only full-heap collections at the moment.
590 gc_time_histogram_ = metrics->FullGcCollectionTime();
591 metrics_gc_count_ = metrics->FullGcCount();
592 metrics_gc_count_delta_ = metrics->FullGcCountDelta();
593 gc_throughput_histogram_ = metrics->FullGcThroughput();
594 gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
595 gc_throughput_avg_ = metrics->FullGcThroughputAvg();
596 gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
597 gc_scanned_bytes_ = metrics->FullGcScannedBytes();
598 gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
599 gc_freed_bytes_ = metrics->FullGcFreedBytes();
600 gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
601 gc_duration_ = metrics->FullGcDuration();
602 gc_duration_delta_ = metrics->FullGcDurationDelta();
603 are_metrics_initialized_ = true;
604 }
605
AddLinearAllocSpaceData(uint8_t * begin,size_t len)606 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
607 DCHECK_ALIGNED_PARAM(begin, gPageSize);
608 DCHECK_ALIGNED_PARAM(len, gPageSize);
609 DCHECK_GE(len, Heap::GetPMDSize());
610 size_t alignment = Heap::BestPageTableAlignment(len);
611 bool is_shared = false;
612 // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
613 if (map_linear_alloc_shared_) {
614 void* ret = mmap(begin,
615 len,
616 PROT_READ | PROT_WRITE,
617 MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
618 /*fd=*/-1,
619 /*offset=*/0);
620 CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
621 is_shared = true;
622 }
623 std::string err_msg;
624 MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
625 len,
626 PROT_NONE,
627 /*low_4gb=*/false,
628 alignment,
629 &err_msg));
630 if (!shadow.IsValid()) {
631 LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
632 UNREACHABLE();
633 }
634
635 MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
636 DivideByPageSize(len),
637 PROT_READ | PROT_WRITE,
638 /*low_4gb=*/false,
639 &err_msg));
640 if (!page_status_map.IsValid()) {
641 LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
642 UNREACHABLE();
643 }
644 linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow),
645 std::forward<MemMap>(page_status_map),
646 begin,
647 begin + len,
648 is_shared);
649 }
650
ClampGrowthLimit(size_t new_capacity)651 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
652 // From-space is the same size as moving-space in virtual memory.
653 // However, if it's in >4GB address space then we don't need to do it
654 // synchronously.
655 #if defined(__LP64__)
656 constexpr bool kClampFromSpace = kObjPtrPoisoning;
657 #else
658 constexpr bool kClampFromSpace = true;
659 #endif
660 size_t old_capacity = bump_pointer_space_->Capacity();
661 new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
662 if (new_capacity < old_capacity) {
663 CHECK(from_space_map_.IsValid());
664 if (kClampFromSpace) {
665 from_space_map_.SetSize(new_capacity);
666 }
667 // NOTE: We usually don't use shadow_to_space_map_ and therefore the condition will
668 // mostly be false.
669 if (shadow_to_space_map_.IsValid() && shadow_to_space_map_.Size() > new_capacity) {
670 shadow_to_space_map_.SetSize(new_capacity);
671 }
672 clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
673 }
674 CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
675 }
676
MaybeClampGcStructures()677 void MarkCompact::MaybeClampGcStructures() {
678 size_t moving_space_size = bump_pointer_space_->Capacity();
679 DCHECK(thread_running_gc_ != nullptr);
680 if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
681 CHECK(from_space_map_.IsValid());
682 if (from_space_map_.Size() > moving_space_size) {
683 from_space_map_.SetSize(moving_space_size);
684 }
685 // Bitmaps and other data structures
686 live_words_bitmap_->SetBitmapSize(moving_space_size);
687 size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
688 CHECK_LT(set_size, info_map_.Size());
689 info_map_.SetSize(set_size);
690
691 clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
692 }
693 }
694
PrepareCardTableForMarking(bool clear_alloc_space_cards)695 void MarkCompact::PrepareCardTableForMarking(bool clear_alloc_space_cards) {
696 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
697 accounting::CardTable* const card_table = heap_->GetCardTable();
698 // immune_spaces_ is emptied in InitializePhase() before marking starts. This
699 // function is invoked twice during marking. We only need to populate immune_spaces_
700 // once per GC cycle. And when it's done (below), all the immune spaces are
701 // added to it. We can never have partially filled immune_spaces_.
702 bool update_immune_spaces = immune_spaces_.IsEmpty();
703 // Mark all of the spaces we never collect as immune.
704 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
705 if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
706 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
707 CHECK(space->IsZygoteSpace() || space->IsImageSpace());
708 if (update_immune_spaces) {
709 immune_spaces_.AddSpace(space);
710 }
711 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
712 if (table != nullptr) {
713 table->ProcessCards();
714 } else {
715 // Keep cards aged if we don't have a mod-union table since we need
716 // to scan them in future GCs. This case is for app images.
717 card_table->ModifyCardsAtomic(
718 space->Begin(),
719 space->End(),
720 [](uint8_t card) {
721 return (card == gc::accounting::CardTable::kCardClean)
722 ? card
723 : gc::accounting::CardTable::kCardAged;
724 },
725 /* card modified visitor */ VoidFunctor());
726 }
727 } else if (clear_alloc_space_cards) {
728 CHECK(!space->IsZygoteSpace());
729 CHECK(!space->IsImageSpace());
730 // The card-table corresponding to bump-pointer and non-moving space can
731 // be cleared, because we are going to traverse all the reachable objects
732 // in these spaces. This card-table will eventually be used to track
733 // mutations while concurrent marking is going on.
734 card_table->ClearCardRange(space->Begin(), space->Limit());
735 if (space != bump_pointer_space_) {
736 CHECK_EQ(space, heap_->GetNonMovingSpace());
737 non_moving_space_ = space;
738 non_moving_space_bitmap_ = space->GetMarkBitmap();
739 }
740 } else {
741 card_table->ModifyCardsAtomic(
742 space->Begin(),
743 space->End(),
744 [](uint8_t card) {
745 return (card == gc::accounting::CardTable::kCardDirty) ?
746 gc::accounting::CardTable::kCardAged :
747 gc::accounting::CardTable::kCardClean;
748 },
749 /* card modified visitor */ VoidFunctor());
750 }
751 }
752 }
753
MarkZygoteLargeObjects()754 void MarkCompact::MarkZygoteLargeObjects() {
755 Thread* self = thread_running_gc_;
756 DCHECK_EQ(self, Thread::Current());
757 space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
758 if (los != nullptr) {
759 // Pick the current live bitmap (mark bitmap if swapped).
760 accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
761 accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
762 // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
763 std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
764 live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
765 reinterpret_cast<uintptr_t>(range.second),
766 [mark_bitmap, los, self](mirror::Object* obj)
767 REQUIRES(Locks::heap_bitmap_lock_)
768 REQUIRES_SHARED(Locks::mutator_lock_) {
769 if (los->IsZygoteLargeObject(self, obj)) {
770 mark_bitmap->Set(obj);
771 }
772 });
773 }
774 }
775
InitializePhase()776 void MarkCompact::InitializePhase() {
777 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
778 mark_stack_ = heap_->GetMarkStack();
779 CHECK(mark_stack_->IsEmpty());
780 immune_spaces_.Reset();
781 moving_first_objs_count_ = 0;
782 non_moving_first_objs_count_ = 0;
783 black_page_count_ = 0;
784 bytes_scanned_ = 0;
785 freed_objects_ = 0;
786 // The first buffer is used by gc-thread.
787 compaction_buffer_counter_.store(1, std::memory_order_relaxed);
788 from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin();
789 black_allocations_begin_ = bump_pointer_space_->Limit();
790 CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
791 moving_space_end_ = bump_pointer_space_->Limit();
792 walk_super_class_cache_ = nullptr;
793 // TODO: Would it suffice to read it once in the constructor, which is called
794 // in zygote process?
795 pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
796 }
797
798 class MarkCompact::ThreadFlipVisitor : public Closure {
799 public:
ThreadFlipVisitor(MarkCompact * collector)800 explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
801
Run(Thread * thread)802 void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
803 // Note: self is not necessarily equal to thread since thread may be suspended.
804 Thread* self = Thread::Current();
805 CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
806 << thread->GetState() << " thread " << thread << " self " << self;
807 thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
808 // Interpreter cache is thread-local so it needs to be swept either in a
809 // flip, or a stop-the-world pause.
810 CHECK(collector_->compacting_);
811 thread->SweepInterpreterCache(collector_);
812 thread->AdjustTlab(collector_->black_objs_slide_diff_);
813 }
814
815 private:
816 MarkCompact* const collector_;
817 };
818
819 class MarkCompact::FlipCallback : public Closure {
820 public:
FlipCallback(MarkCompact * collector)821 explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
822
Run(Thread * thread)823 void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
824 collector_->CompactionPause();
825 }
826
827 private:
828 MarkCompact* const collector_;
829 };
830
RunPhases()831 void MarkCompact::RunPhases() {
832 Thread* self = Thread::Current();
833 thread_running_gc_ = self;
834 Runtime* runtime = Runtime::Current();
835 InitializePhase();
836 GetHeap()->PreGcVerification(this);
837 {
838 ReaderMutexLock mu(self, *Locks::mutator_lock_);
839 MarkingPhase();
840 }
841 {
842 // Marking pause
843 ScopedPause pause(this);
844 MarkingPause();
845 if (kIsDebugBuild) {
846 bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
847 }
848 }
849 {
850 ReaderMutexLock mu(self, *Locks::mutator_lock_);
851 ReclaimPhase();
852 PrepareForCompaction();
853 }
854 if (uffd_ != kFallbackMode && !use_uffd_sigbus_) {
855 heap_->GetThreadPool()->WaitForWorkersToBeCreated();
856 }
857
858 {
859 // Compaction pause
860 ThreadFlipVisitor visitor(this);
861 FlipCallback callback(this);
862 runtime->GetThreadList()->FlipThreadRoots(
863 &visitor, &callback, this, GetHeap()->GetGcPauseListener());
864 }
865
866 if (IsValidFd(uffd_)) {
867 ReaderMutexLock mu(self, *Locks::mutator_lock_);
868 CompactionPhase();
869 }
870
871 FinishPhase();
872 thread_running_gc_ = nullptr;
873 }
874
InitMovingSpaceFirstObjects(const size_t vec_len)875 void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
876 // Find the first live word first.
877 size_t to_space_page_idx = 0;
878 uint32_t offset_in_chunk_word;
879 uint32_t offset;
880 mirror::Object* obj;
881 const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
882
883 size_t chunk_idx;
884 // Find the first live word in the space
885 for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
886 if (chunk_idx >= vec_len) {
887 // We don't have any live data on the moving-space.
888 return;
889 }
890 }
891 DCHECK_LT(chunk_idx, vec_len);
892 // Use live-words bitmap to find the first word
893 offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
894 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
895 DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
896 << " chunk_idx=" << chunk_idx
897 << " N=0"
898 << " offset_in_word=" << offset_in_chunk_word
899 << " word=" << std::hex
900 << live_words_bitmap_->GetWord(chunk_idx);
901 // The first object doesn't require using FindPrecedingObject().
902 obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment);
903 // TODO: add a check to validate the object.
904
905 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
906 first_objs_moving_space_[to_space_page_idx].Assign(obj);
907 to_space_page_idx++;
908
909 uint32_t page_live_bytes = 0;
910 while (true) {
911 for (; page_live_bytes <= gPageSize; chunk_idx++) {
912 if (chunk_idx >= vec_len) {
913 moving_first_objs_count_ = to_space_page_idx;
914 return;
915 }
916 page_live_bytes += chunk_info_vec_[chunk_idx];
917 }
918 chunk_idx--;
919 page_live_bytes -= gPageSize;
920 DCHECK_LE(page_live_bytes, kOffsetChunkSize);
921 DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
922 << " chunk_idx=" << chunk_idx
923 << " to_space_page_idx=" << to_space_page_idx
924 << " vec_len=" << vec_len;
925 DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
926 offset_in_chunk_word =
927 live_words_bitmap_->FindNthLiveWordOffset(
928 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
929 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
930 DCHECK(live_words_bitmap_->Test(offset))
931 << "offset=" << offset
932 << " chunk_idx=" << chunk_idx
933 << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
934 << " offset_in_word=" << offset_in_chunk_word
935 << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
936 // TODO: Can we optimize this for large objects? If we are continuing a
937 // large object that spans multiple pages, then we may be able to do without
938 // calling FindPrecedingObject().
939 //
940 // Find the object which encapsulates offset in it, which could be
941 // starting at offset itself.
942 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
943 // TODO: add a check to validate the object.
944 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
945 first_objs_moving_space_[to_space_page_idx].Assign(obj);
946 to_space_page_idx++;
947 chunk_idx++;
948 }
949 }
950
InitNonMovingSpaceFirstObjects()951 void MarkCompact::InitNonMovingSpaceFirstObjects() {
952 accounting::ContinuousSpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
953 uintptr_t begin = reinterpret_cast<uintptr_t>(non_moving_space_->Begin());
954 const uintptr_t end = reinterpret_cast<uintptr_t>(non_moving_space_->End());
955 mirror::Object* prev_obj;
956 size_t page_idx;
957 {
958 // Find first live object
959 mirror::Object* obj = nullptr;
960 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
961 end,
962 [&obj] (mirror::Object* o) {
963 obj = o;
964 });
965 if (obj == nullptr) {
966 // There are no live objects in the non-moving space
967 return;
968 }
969 page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
970 first_objs_non_moving_space_[page_idx++].Assign(obj);
971 prev_obj = obj;
972 }
973 // TODO: check obj is valid
974 uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
975 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
976 // For every page find the object starting from which we need to call
977 // VisitReferences. It could either be an object that started on some
978 // preceding page, or some object starting within this page.
979 begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
980 while (begin < end) {
981 // Utilize, if any, large object that started in some preceding page, but
982 // overlaps with this page as well.
983 if (prev_obj != nullptr && prev_obj_end > begin) {
984 DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
985 first_objs_non_moving_space_[page_idx].Assign(prev_obj);
986 mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
987 if (HasAddress(klass)) {
988 LOG(WARNING) << "found inter-page object " << prev_obj
989 << " in non-moving space with klass " << klass
990 << " in moving space";
991 }
992 } else {
993 prev_obj_end = 0;
994 // It's sufficient to only search for previous object in the preceding page.
995 // If no live object started in that page and some object had started in
996 // the page preceding to that page, which was big enough to overlap with
997 // the current page, then we wouldn't be in the else part.
998 prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
999 if (prev_obj != nullptr) {
1000 prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
1001 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
1002 }
1003 if (prev_obj_end > begin) {
1004 mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1005 if (HasAddress(klass)) {
1006 LOG(WARNING) << "found inter-page object " << prev_obj
1007 << " in non-moving space with klass " << klass
1008 << " in moving space";
1009 }
1010 first_objs_non_moving_space_[page_idx].Assign(prev_obj);
1011 } else {
1012 // Find the first live object in this page
1013 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
1014 begin,
1015 begin + gPageSize,
1016 [this, page_idx] (mirror::Object* obj) {
1017 first_objs_non_moving_space_[page_idx].Assign(obj);
1018 });
1019 }
1020 // An empty entry indicates that the page has no live objects and hence
1021 // can be skipped.
1022 }
1023 begin += gPageSize;
1024 page_idx++;
1025 }
1026 non_moving_first_objs_count_ = page_idx;
1027 }
1028
CanCompactMovingSpaceWithMinorFault()1029 bool MarkCompact::CanCompactMovingSpaceWithMinorFault() {
1030 size_t min_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
1031 return minor_fault_initialized_ && shadow_to_space_map_.IsValid() &&
1032 shadow_to_space_map_.Size() >= min_size;
1033 }
1034
1035 class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask {
1036 public:
ConcurrentCompactionGcTask(MarkCompact * collector,size_t idx)1037 explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx)
1038 : collector_(collector), index_(idx) {}
1039
Run(Thread * self)1040 void Run([[maybe_unused]] Thread* self) override REQUIRES_SHARED(Locks::mutator_lock_) {
1041 if (collector_->CanCompactMovingSpaceWithMinorFault()) {
1042 collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr);
1043 } else {
1044 // The passed page/buf to ConcurrentCompaction is used by the thread as a
1045 // gPageSize buffer for compacting and updating objects into and then
1046 // passing the buf to uffd ioctls.
1047 uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * gPageSize;
1048 collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf);
1049 }
1050 }
1051
1052 private:
1053 MarkCompact* const collector_;
1054 size_t index_;
1055 };
1056
PrepareForCompaction()1057 void MarkCompact::PrepareForCompaction() {
1058 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1059 uint8_t* space_begin = bump_pointer_space_->Begin();
1060 size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize;
1061 DCHECK_LE(vector_len, vector_length_);
1062 for (size_t i = 0; i < vector_len; i++) {
1063 DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
1064 DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
1065 }
1066 InitMovingSpaceFirstObjects(vector_len);
1067 InitNonMovingSpaceFirstObjects();
1068
1069 // TODO: We can do a lot of neat tricks with this offset vector to tune the
1070 // compaction as we wish. Originally, the compaction algorithm slides all
1071 // live objects towards the beginning of the heap. This is nice because it
1072 // keeps the spatial locality of objects intact.
1073 // However, sometimes it's desired to compact objects in certain portions
1074 // of the heap. For instance, it is expected that, over time,
1075 // objects towards the beginning of the heap are long lived and are always
1076 // densely packed. In this case, it makes sense to only update references in
1077 // there and not try to compact it.
1078 // Furthermore, we might have some large objects and may not want to move such
1079 // objects.
1080 // We can adjust, without too much effort, the values in the chunk_info_vec_ such
1081 // that the objects in the dense beginning area aren't moved. OTOH, large
1082 // objects, which could be anywhere in the heap, could also be kept from
1083 // moving by using a similar trick. The only issue is that by doing this we will
1084 // leave an unused hole in the middle of the heap which can't be used for
1085 // allocations until we do a *full* compaction.
1086 //
1087 // At this point every element in the chunk_info_vec_ contains the live-bytes
1088 // of the corresponding chunk. For old-to-new address computation we need
1089 // every element to reflect total live-bytes till the corresponding chunk.
1090
1091 // Live-bytes count is required to compute post_compact_end_ below.
1092 uint32_t total;
1093 // Update the vector one past the heap usage as it is required for black
1094 // allocated objects' post-compact address computation.
1095 if (vector_len < vector_length_) {
1096 vector_len++;
1097 total = 0;
1098 } else {
1099 // Fetch the value stored in the last element before it gets overwritten by
1100 // std::exclusive_scan().
1101 total = chunk_info_vec_[vector_len - 1];
1102 }
1103 std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0);
1104 total += chunk_info_vec_[vector_len - 1];
1105
1106 for (size_t i = vector_len; i < vector_length_; i++) {
1107 DCHECK_EQ(chunk_info_vec_[i], 0u);
1108 }
1109 post_compact_end_ = AlignUp(space_begin + total, gPageSize);
1110 CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * gPageSize);
1111 black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1112 // We shouldn't be consuming more space after compaction than pre-compaction.
1113 CHECK_GE(black_objs_slide_diff_, 0);
1114 // How do we handle compaction of heap portion used for allocations after the
1115 // marking-pause?
1116 // All allocations after the marking-pause are considered black (reachable)
1117 // for this GC cycle. However, they need not be allocated contiguously as
1118 // different mutators use TLABs. So we will compact the heap till the point
1119 // where allocations took place before the marking-pause. And everything after
1120 // that will be slid with TLAB holes, and then TLAB info in TLS will be
1121 // appropriately updated in the pre-compaction pause.
1122 // The chunk-info vector entries for the post marking-pause allocations will be
1123 // also updated in the pre-compaction pause.
1124
1125 bool is_zygote = Runtime::Current()->IsZygote();
1126 if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) {
1127 if (!use_uffd_sigbus_) {
1128 // Register the buffer that we use for terminating concurrent compaction
1129 struct uffdio_register uffd_register;
1130 uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_);
1131 uffd_register.range.len = gPageSize;
1132 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1133 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
1134 << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno);
1135 }
1136 if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) {
1137 // A valid shadow-map for moving space is only possible if we
1138 // were able to map it in the constructor. That also means that its size
1139 // matches the moving-space.
1140 CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1141 // Release the shadow map for moving-space if we don't support minor-fault
1142 // as it's not required.
1143 shadow_to_space_map_.Reset();
1144 }
1145 }
1146 // For zygote we create the thread pool each time before starting compaction,
1147 // and get rid of it when finished. This is expected to happen rarely as
1148 // zygote spends most of the time in native fork loop.
1149 if (uffd_ != kFallbackMode) {
1150 if (!use_uffd_sigbus_) {
1151 ThreadPool* pool = heap_->GetThreadPool();
1152 if (UNLIKELY(pool == nullptr)) {
1153 // On devices with 2 cores, GetParallelGCThreadCount() will return 1,
1154 // which is desired number of workers on such devices.
1155 heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
1156 pool = heap_->GetThreadPool();
1157 }
1158 size_t num_threads = pool->GetThreadCount();
1159 thread_pool_counter_ = num_threads;
1160 for (size_t i = 0; i < num_threads; i++) {
1161 pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1));
1162 }
1163 CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads);
1164 }
1165 /*
1166 * Possible scenarios for mappings:
1167 * A) All zygote GCs (or if minor-fault feature isn't available): uses
1168 * uffd's copy mode
1169 * 1) For moving-space ('to' space is same as the moving-space):
1170 * a) Private-anonymous mappings for 'to' and 'from' space are created in
1171 * the constructor.
1172 * b) In the compaction pause, we mremap(dontunmap) from 'to' space to
1173 * 'from' space. This results in moving all pages to 'from' space and
1174 * emptying the 'to' space, thereby preparing it for userfaultfd
1175 * registration.
1176 *
1177 * 2) For linear-alloc space:
1178 * a) Private-anonymous mappings for the linear-alloc and its 'shadow'
1179 * are created by the arena-pool.
1180 * b) In the compaction pause, we mremap(dontumap) with similar effect as
1181 * (A.1.b) above.
1182 *
1183 * B) First GC after zygote: uses uffd's copy-mode
1184 * 1) For moving-space:
1185 * a) If the mmap for shadow-map has been successful in the constructor,
1186 * then we remap it (mmap with MAP_FIXED) to get a shared-anonymous
1187 * mapping.
1188 * b) Else, we create two memfd and ftruncate them to the moving-space
1189 * size.
1190 * c) Same as (A.1.b)
1191 * d) If (B.1.a), then mremap(dontunmap) from shadow-map to
1192 * 'to' space. This will make both of them map to the same pages
1193 * e) If (B.1.b), then mmap with the first memfd in shared mode on the
1194 * 'to' space.
1195 * f) At the end of compaction, we will have moved the moving-space
1196 * objects to a MAP_SHARED mapping, readying it for minor-fault from next
1197 * GC cycle.
1198 *
1199 * 2) For linear-alloc space:
1200 * a) Same as (A.2.b)
1201 * b) mmap a shared-anonymous mapping onto the linear-alloc space.
1202 * c) Same as (B.1.f)
1203 *
1204 * C) All subsequent GCs: preferable minor-fault mode. But may also require
1205 * using copy-mode.
1206 * 1) For moving-space:
1207 * a) If the shadow-map is created and no memfd was used, then that means
1208 * we are using shared-anonymous. Therefore, mmap a shared-anonymous on
1209 * the shadow-space.
1210 * b) If the shadow-map is not mapped yet, then mmap one with a size
1211 * big enough to hold the compacted moving space. This may fail, in which
1212 * case we will use uffd's copy-mode.
1213 * c) If (b) is successful, then mmap the free memfd onto shadow-map.
1214 * d) Same as (A.1.b)
1215 * e) In compaction pause, if the shadow-map was not created, then use
1216 * copy-mode.
1217 * f) Else, if the created map is smaller than the required-size, then
1218 * use mremap (without dontunmap) to expand the size. If failed, then use
1219 * copy-mode.
1220 * g) Otherwise, same as (B.1.d) and use minor-fault mode.
1221 *
1222 * 2) For linear-alloc space:
1223 * a) Same as (A.2.b)
1224 * b) Use minor-fault mode
1225 */
1226 auto mmap_shadow_map = [this](int flags, int fd) {
1227 void* ret = mmap(shadow_to_space_map_.Begin(),
1228 shadow_to_space_map_.Size(),
1229 PROT_READ | PROT_WRITE,
1230 flags,
1231 fd,
1232 /*offset=*/0);
1233 DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno);
1234 };
1235 // Setup all the virtual memory ranges required for concurrent compaction.
1236 if (minor_fault_initialized_) {
1237 DCHECK(!is_zygote);
1238 if (UNLIKELY(!shadow_to_space_map_.IsValid())) {
1239 // This case happens only once on the first GC in minor-fault mode, if
1240 // we were unable to reserve shadow-map for moving-space in the
1241 // beginning.
1242 DCHECK_GE(moving_to_space_fd_, 0);
1243 // Take extra 4MB to reduce the likelihood of requiring resizing this
1244 // map in the pause due to black allocations.
1245 size_t reqd_size = std::min(moving_first_objs_count_ * gPageSize + 4 * MB,
1246 bump_pointer_space_->Capacity());
1247 // We cannot support memory-tool with shadow-map (as it requires
1248 // appending a redzone) in this case because the mapping may have to be expanded
1249 // using mremap (in KernelPreparation()), which would ignore the redzone.
1250 // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't.
1251 std::string err_msg;
1252 shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow",
1253 reqd_size,
1254 PROT_NONE,
1255 /*low_4gb=*/kObjPtrPoisoning,
1256 &err_msg);
1257
1258 if (shadow_to_space_map_.IsValid()) {
1259 CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u);
1260 // We want to use MemMap to get low-4GB mapping, if required, but then also
1261 // want to have its ownership as we may grow it (in
1262 // KernelPreparation()). If the ownership is not taken and we try to
1263 // resize MemMap, then it unmaps the virtual range.
1264 MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(),
1265 /*reuse*/ true);
1266 std::swap(temp, shadow_to_space_map_);
1267 DCHECK(!temp.IsValid());
1268 } else {
1269 LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size)
1270 << " size. " << err_msg;
1271 }
1272 }
1273
1274 if (LIKELY(shadow_to_space_map_.IsValid())) {
1275 int fd = moving_to_space_fd_;
1276 int mmap_flags = MAP_SHARED | MAP_FIXED;
1277 if (fd == kFdUnused) {
1278 // Unused moving-to-space fd means we are using anonymous shared
1279 // mapping.
1280 DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1281 mmap_flags |= MAP_ANONYMOUS;
1282 fd = -1;
1283 }
1284 // If the map is smaller than required, then we'll do mremap in the
1285 // compaction pause to increase the size.
1286 mmap_shadow_map(mmap_flags, fd);
1287 }
1288
1289 for (auto& data : linear_alloc_spaces_data_) {
1290 DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0)
1291 << "mprotect failed: " << strerror(errno);
1292 }
1293 } else if (!is_zygote && uffd_minor_fault_supported_) {
1294 // First GC after zygote-fork. We will still use uffd's copy mode but will
1295 // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which
1296 // will use uffd's minor-fault feature).
1297 if (shadow_to_space_map_.IsValid() &&
1298 shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
1299 mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1);
1300 } else {
1301 size_t size = bump_pointer_space_->Capacity();
1302 DCHECK_EQ(moving_to_space_fd_, kFdUnused);
1303 DCHECK_EQ(moving_from_space_fd_, kFdUnused);
1304 const char* name = bump_pointer_space_->GetName();
1305 moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1306 CHECK_NE(moving_to_space_fd_, -1)
1307 << "memfd_create: failed for " << name << ": " << strerror(errno);
1308 moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1309 CHECK_NE(moving_from_space_fd_, -1)
1310 << "memfd_create: failed for " << name << ": " << strerror(errno);
1311
1312 // memfds are considered as files from resource limits point of view.
1313 // And the moving space could be several hundred MBs. So increase the
1314 // limit, if it's lower than moving-space size.
1315 bool rlimit_changed = false;
1316 rlimit rlim_read;
1317 CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno);
1318 if (rlim_read.rlim_cur < size) {
1319 rlimit_changed = true;
1320 rlimit rlim = rlim_read;
1321 rlim.rlim_cur = size;
1322 CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno);
1323 }
1324
1325 // moving-space will map this fd so that we compact objects into it.
1326 int ret = ftruncate(moving_to_space_fd_, size);
1327 CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1328 ret = ftruncate(moving_from_space_fd_, size);
1329 CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1330
1331 if (rlimit_changed) {
1332 // reset the rlimit to the original limits.
1333 CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0)
1334 << "setrlimit failed: " << strerror(errno);
1335 }
1336 }
1337 }
1338 }
1339 }
1340
1341 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1342 public:
VerifyRootMarkedVisitor(MarkCompact * collector)1343 explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1344
VisitRoot(mirror::Object * root,const RootInfo & info)1345 void VisitRoot(mirror::Object* root, const RootInfo& info) override
1346 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1347 CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1348 }
1349
1350 private:
1351 MarkCompact* const collector_;
1352 };
1353
ReMarkRoots(Runtime * runtime)1354 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1355 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1356 DCHECK_EQ(thread_running_gc_, Thread::Current());
1357 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1358 MarkNonThreadRoots(runtime);
1359 MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1360 | kVisitRootFlagStopLoggingNewRoots
1361 | kVisitRootFlagClearRootLog),
1362 runtime);
1363
1364 if (kVerifyRootsMarked) {
1365 TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1366 VerifyRootMarkedVisitor visitor(this);
1367 runtime->VisitRoots(&visitor);
1368 }
1369 }
1370
MarkingPause()1371 void MarkCompact::MarkingPause() {
1372 TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1373 Runtime* runtime = Runtime::Current();
1374 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1375 {
1376 // Handle the dirty objects as we are a concurrent GC
1377 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1378 {
1379 MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1380 MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1381 std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1382 for (Thread* thread : thread_list) {
1383 thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1384 DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1385 // Need to revoke all the thread-local allocation stacks since we will
1386 // swap the allocation stacks (below) and don't want anybody to allocate
1387 // into the live stack.
1388 thread->RevokeThreadLocalAllocationStack();
1389 bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1390 }
1391 }
1392 // Fetch only the accumulated objects-allocated count as it is guaranteed to
1393 // be up-to-date after the TLAB revocation above.
1394 freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1395 // Capture 'end' of moving-space at this point. Every allocation beyond this
1396 // point will be considered as black.
1397 // Align-up to page boundary so that black allocations happen from next page
1398 // onwards. Also, it ensures that 'end' is aligned for card-table's
1399 // ClearCardRange().
1400 black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1401 DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1402
1403 // Re-mark root set. Doesn't include thread-roots as they are already marked
1404 // above.
1405 ReMarkRoots(runtime);
1406 // Scan dirty objects.
1407 RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1408 {
1409 TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1410 heap_->SwapStacks();
1411 live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1412 }
1413 }
1414 // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1415 // objects in bump-pointer space when we have a mark-bitmap to indicate live
1416 // objects. At the same time we also need to be able to visit black allocations,
1417 // even though they are not marked in the bitmap. Without both of these we fail
1418 // pre-sweeping verification. As well as we leave windows open wherein a
1419 // VisitObjects/Walk on the space would either miss some objects or visit
1420 // unreachable ones. These windows are when we are switching from shared
1421 // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1422 // heap_->PreSweepingGcVerification(this);
1423
1424 // Disallow new system weaks to prevent a race which occurs when someone adds
1425 // a new system weak before we sweep them. Since this new system weak may not
1426 // be marked, the GC may incorrectly sweep it. This also fixes a race where
1427 // interning may attempt to return a strong reference to a string that is
1428 // about to be swept.
1429 runtime->DisallowNewSystemWeaks();
1430 // Enable the reference processing slow path, needs to be done with mutators
1431 // paused since there is no lock in the GetReferent fast path.
1432 heap_->GetReferenceProcessor()->EnableSlowPath();
1433 }
1434
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1435 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1436 TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1437 GetTimings());
1438 ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1439 runtime->SweepSystemWeaks(this);
1440 }
1441
ProcessReferences(Thread * self)1442 void MarkCompact::ProcessReferences(Thread* self) {
1443 WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1444 GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1445 }
1446
Sweep(bool swap_bitmaps)1447 void MarkCompact::Sweep(bool swap_bitmaps) {
1448 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1449 // Ensure that nobody inserted objects in the live stack after we swapped the
1450 // stacks.
1451 CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1452 {
1453 TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1454 // Mark everything allocated since the last GC as live so that we can sweep
1455 // concurrently, knowing that new allocations won't be marked as live.
1456 accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1457 heap_->MarkAllocStackAsLive(live_stack);
1458 live_stack->Reset();
1459 DCHECK(mark_stack_->IsEmpty());
1460 }
1461 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1462 if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1463 !immune_spaces_.ContainsSpace(space)) {
1464 space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1465 DCHECK(!alloc_space->IsZygoteSpace());
1466 TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1467 RecordFree(alloc_space->Sweep(swap_bitmaps));
1468 }
1469 }
1470 SweepLargeObjects(swap_bitmaps);
1471 }
1472
SweepLargeObjects(bool swap_bitmaps)1473 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1474 space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1475 if (los != nullptr) {
1476 TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1477 RecordFreeLOS(los->Sweep(swap_bitmaps));
1478 }
1479 }
1480
ReclaimPhase()1481 void MarkCompact::ReclaimPhase() {
1482 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1483 DCHECK(thread_running_gc_ == Thread::Current());
1484 Runtime* const runtime = Runtime::Current();
1485 // Process the references concurrently.
1486 ProcessReferences(thread_running_gc_);
1487 // TODO: Try to merge this system-weak sweeping with the one while updating
1488 // references during the compaction pause.
1489 SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1490 runtime->AllowNewSystemWeaks();
1491 // Clean up class loaders after system weaks are swept since that is how we know if class
1492 // unloading occurred.
1493 runtime->GetClassLinker()->CleanupClassLoaders();
1494 {
1495 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1496 // Reclaim unmarked objects.
1497 Sweep(false);
1498 // Swap the live and mark bitmaps for each space which we modified space. This is an
1499 // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1500 // bitmaps.
1501 SwapBitmaps();
1502 // Unbind the live and mark bitmaps.
1503 GetHeap()->UnBindBitmaps();
1504 }
1505 }
1506
1507 // We want to avoid checking for every reference if it's within the page or
1508 // not. This can be done if we know where in the page the holder object lies.
1509 // If it doesn't overlap either boundaries then we can skip the checks.
1510 template <bool kCheckBegin, bool kCheckEnd>
1511 class MarkCompact::RefsUpdateVisitor {
1512 public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1513 explicit RefsUpdateVisitor(MarkCompact* collector,
1514 mirror::Object* obj,
1515 uint8_t* begin,
1516 uint8_t* end)
1517 : collector_(collector),
1518 moving_space_begin_(collector->moving_space_begin_),
1519 moving_space_end_(collector->moving_space_end_),
1520 obj_(obj),
1521 begin_(begin),
1522 end_(end) {
1523 DCHECK(!kCheckBegin || begin != nullptr);
1524 DCHECK(!kCheckEnd || end != nullptr);
1525 }
1526
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1527 void operator()([[maybe_unused]] mirror::Object* old,
1528 MemberOffset offset,
1529 [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1530 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1531 bool update = true;
1532 if (kCheckBegin || kCheckEnd) {
1533 uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1534 update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1535 }
1536 if (update) {
1537 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1538 }
1539 }
1540
1541 // For object arrays we don't need to check boundaries here as it's done in
1542 // VisitReferenes().
1543 // TODO: Optimize reference updating using SIMD instructions. Object arrays
1544 // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1545 void operator()([[maybe_unused]] mirror::Object* old,
1546 MemberOffset offset,
1547 [[maybe_unused]] bool is_static,
1548 [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1549 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1550 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1551 }
1552
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1553 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1554 ALWAYS_INLINE
1555 REQUIRES_SHARED(Locks::mutator_lock_) {
1556 if (!root->IsNull()) {
1557 VisitRoot(root);
1558 }
1559 }
1560
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1561 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1562 ALWAYS_INLINE
1563 REQUIRES_SHARED(Locks::mutator_lock_) {
1564 collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1565 }
1566
1567 private:
1568 MarkCompact* const collector_;
1569 uint8_t* const moving_space_begin_;
1570 uint8_t* const moving_space_end_;
1571 mirror::Object* const obj_;
1572 uint8_t* const begin_;
1573 uint8_t* const end_;
1574 };
1575
IsValidObject(mirror::Object * obj) const1576 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1577 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1578 if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1579 return false;
1580 }
1581 return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1582 obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1583 }
1584
1585 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1586 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1587 if (kIsDebugBuild) {
1588 mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1589 mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1590 mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1591 mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1592 if (HasAddress(pre_compact_klass) &&
1593 reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1594 CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1595 << "ref=" << ref
1596 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1597 << " pre_compact_klass=" << pre_compact_klass
1598 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1599 CHECK(live_words_bitmap_->Test(pre_compact_klass));
1600 }
1601 if (!IsValidObject(ref)) {
1602 std::ostringstream oss;
1603 oss << "Invalid object: "
1604 << "ref=" << ref
1605 << " klass=" << klass
1606 << " klass_klass=" << klass_klass
1607 << " klass_klass_klass=" << klass_klass_klass
1608 << " pre_compact_klass=" << pre_compact_klass
1609 << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1610 << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1611 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1612 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1613
1614 // Call callback before dumping larger data like RAM and space dumps.
1615 callback(oss);
1616
1617 oss << " \nobject="
1618 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1619 << " \nklass(from)="
1620 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1621 << "spaces:\n";
1622 heap_->DumpSpaces(oss);
1623 LOG(FATAL) << oss.str();
1624 }
1625 }
1626 }
1627
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1628 void MarkCompact::CompactPage(mirror::Object* obj,
1629 uint32_t offset,
1630 uint8_t* addr,
1631 bool needs_memset_zero) {
1632 DCHECK(moving_space_bitmap_->Test(obj)
1633 && live_words_bitmap_->Test(obj));
1634 DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1635 << " offset=" << offset
1636 << " addr=" << static_cast<void*>(addr)
1637 << " black_allocs_begin="
1638 << static_cast<void*>(black_allocations_begin_)
1639 << " post_compact_addr="
1640 << static_cast<void*>(post_compact_end_);
1641 uint8_t* const start_addr = addr;
1642 // How many distinct live-strides do we have.
1643 size_t stride_count = 0;
1644 uint8_t* last_stride = addr;
1645 uint32_t last_stride_begin = 0;
1646 auto verify_obj_callback = [&] (std::ostream& os) {
1647 os << " stride_count=" << stride_count
1648 << " last_stride=" << static_cast<void*>(last_stride)
1649 << " offset=" << offset
1650 << " start_addr=" << static_cast<void*>(start_addr);
1651 };
1652 obj = GetFromSpaceAddr(obj);
1653 live_words_bitmap_->VisitLiveStrides(
1654 offset,
1655 black_allocations_begin_,
1656 gPageSize,
1657 [&addr, &last_stride, &stride_count, &last_stride_begin, verify_obj_callback, this](
1658 uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1659 REQUIRES_SHARED(Locks::mutator_lock_) {
1660 const size_t stride_in_bytes = stride_size * kAlignment;
1661 DCHECK_LE(stride_in_bytes, gPageSize);
1662 last_stride_begin = stride_begin;
1663 DCHECK(IsAligned<kAlignment>(addr));
1664 memcpy(addr, from_space_begin_ + stride_begin * kAlignment, stride_in_bytes);
1665 if (kIsDebugBuild) {
1666 uint8_t* space_begin = bump_pointer_space_->Begin();
1667 // We can interpret the first word of the stride as an
1668 // obj only from second stride onwards, as the first
1669 // stride's first-object may have started on previous
1670 // page. The only exception is the first page of the
1671 // moving space.
1672 if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1673 mirror::Object* o =
1674 reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1675 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1676 CHECK(moving_space_bitmap_->Test(o))
1677 << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1678 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1679 }
1680 }
1681 last_stride = addr;
1682 addr += stride_in_bytes;
1683 stride_count++;
1684 });
1685 DCHECK_LT(last_stride, start_addr + gPageSize);
1686 DCHECK_GT(stride_count, 0u);
1687 size_t obj_size = 0;
1688 uint32_t offset_within_obj = offset * kAlignment
1689 - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1690 // First object
1691 if (offset_within_obj > 0) {
1692 mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1693 if (stride_count > 1) {
1694 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1695 to_ref,
1696 start_addr,
1697 nullptr);
1698 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1699 visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1700 } else {
1701 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1702 to_ref,
1703 start_addr,
1704 start_addr + gPageSize);
1705 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1706 visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1707 + gPageSize));
1708 }
1709 obj_size = RoundUp(obj_size, kAlignment);
1710 DCHECK_GT(obj_size, offset_within_obj)
1711 << "obj:" << obj
1712 << " class:"
1713 << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1714 << " to_addr:" << to_ref
1715 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1716 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1717 << " offset:" << offset * kAlignment
1718 << " class-after-obj-iter:"
1719 << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1720 class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1721 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1722 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1723 << " offset-of-last-idx:"
1724 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1725 << " first-obj-of-last-idx:"
1726 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1727
1728 obj_size -= offset_within_obj;
1729 // If there is only one stride, then adjust last_stride_begin to the
1730 // end of the first object.
1731 if (stride_count == 1) {
1732 last_stride_begin += obj_size / kAlignment;
1733 }
1734 }
1735
1736 // Except for the last page being compacted, the pages will have addr ==
1737 // start_addr + gPageSize.
1738 uint8_t* const end_addr = addr;
1739 addr = start_addr;
1740 size_t bytes_done = obj_size;
1741 // All strides except the last one can be updated without any boundary
1742 // checks.
1743 DCHECK_LE(addr, last_stride);
1744 size_t bytes_to_visit = last_stride - addr;
1745 DCHECK_LE(bytes_to_visit, gPageSize);
1746 while (bytes_to_visit > bytes_done) {
1747 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1748 VerifyObject(ref, verify_obj_callback);
1749 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1750 visitor(this, ref, nullptr, nullptr);
1751 obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1752 obj_size = RoundUp(obj_size, kAlignment);
1753 bytes_done += obj_size;
1754 }
1755 // Last stride may have multiple objects in it and we don't know where the
1756 // last object which crosses the page boundary starts, therefore check
1757 // page-end in all of these objects. Also, we need to call
1758 // VisitRefsForCompaction() with from-space object as we fetch object size,
1759 // which in case of klass requires 'class_size_'.
1760 uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1761 bytes_to_visit = end_addr - addr;
1762 DCHECK_LE(bytes_to_visit, gPageSize);
1763 while (bytes_to_visit > bytes_done) {
1764 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1765 obj = reinterpret_cast<mirror::Object*>(from_addr);
1766 VerifyObject(ref, verify_obj_callback);
1767 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1768 visitor(this, ref, nullptr, start_addr + gPageSize);
1769 obj_size = obj->VisitRefsForCompaction(visitor,
1770 MemberOffset(0),
1771 MemberOffset(end_addr - (addr + bytes_done)));
1772 obj_size = RoundUp(obj_size, kAlignment);
1773 DCHECK_GT(obj_size, 0u)
1774 << "from_addr:" << obj
1775 << " from-space-class:"
1776 << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1777 << " to_addr:" << ref
1778 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1779 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1780 << " offset:" << offset * kAlignment
1781 << " bytes_done:" << bytes_done
1782 << " class-after-obj-iter:"
1783 << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1784 class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1785 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1786 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1787 << " offset-of-last-idx:"
1788 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1789 << " first-obj-of-last-idx:"
1790 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1791
1792 from_addr += obj_size;
1793 bytes_done += obj_size;
1794 }
1795 // The last page that we compact may have some bytes left untouched in the
1796 // end, we should zero them as the kernel copies at page granularity.
1797 if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1798 std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1799 }
1800 }
1801
1802 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1803 // size. If more TLAB(s) started in this page, then those chunks are identified
1804 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1805 // If we find a set bit in the bitmap, then we copy the remaining page and then
1806 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1807 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1808 mirror::Object* next_page_first_obj,
1809 uint32_t first_chunk_size,
1810 uint8_t* const pre_compact_page,
1811 uint8_t* dest,
1812 bool needs_memset_zero) {
1813 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1814 size_t bytes_copied;
1815 uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1816 uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1817 uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1818 uint8_t* const dest_page_end = dest + gPageSize;
1819
1820 auto verify_obj_callback = [&] (std::ostream& os) {
1821 os << " first_obj=" << first_obj
1822 << " next_page_first_obj=" << next_page_first_obj
1823 << " first_chunk_sie=" << first_chunk_size
1824 << " dest=" << static_cast<void*>(dest)
1825 << " pre_compact_page="
1826 << static_cast<void* const>(pre_compact_page);
1827 };
1828 // We have empty portion at the beginning of the page. Zero it.
1829 if (pre_compact_addr > pre_compact_page) {
1830 bytes_copied = pre_compact_addr - pre_compact_page;
1831 DCHECK_LT(bytes_copied, gPageSize);
1832 if (needs_memset_zero) {
1833 std::memset(dest, 0x0, bytes_copied);
1834 }
1835 dest += bytes_copied;
1836 } else {
1837 bytes_copied = 0;
1838 size_t offset = pre_compact_page - pre_compact_addr;
1839 pre_compact_addr = pre_compact_page;
1840 src_addr += offset;
1841 DCHECK(IsAlignedParam(src_addr, gPageSize));
1842 }
1843 // Copy the first chunk of live words
1844 std::memcpy(dest, src_addr, first_chunk_size);
1845 // Update references in the first chunk. Use object size to find next object.
1846 {
1847 size_t bytes_to_visit = first_chunk_size;
1848 size_t obj_size;
1849 // The first object started in some previous page. So we need to check the
1850 // beginning.
1851 DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1852 size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1853 if (bytes_copied == 0 && offset > 0) {
1854 mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1855 mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1856 // If the next page's first-obj is in this page or nullptr, then we don't
1857 // need to check end boundary
1858 if (next_page_first_obj == nullptr
1859 || (first_obj != next_page_first_obj
1860 && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1861 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1862 to_obj,
1863 dest,
1864 nullptr);
1865 obj_size = from_obj->VisitRefsForCompaction<
1866 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1867 MemberOffset(offset),
1868 MemberOffset(-1));
1869 } else {
1870 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1871 to_obj,
1872 dest,
1873 dest_page_end);
1874 obj_size = from_obj->VisitRefsForCompaction<
1875 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1876 MemberOffset(offset),
1877 MemberOffset(offset
1878 + gPageSize));
1879 if (first_obj == next_page_first_obj) {
1880 // First object is the only object on this page. So there's nothing else left to do.
1881 return;
1882 }
1883 }
1884 obj_size = RoundUp(obj_size, kAlignment);
1885 obj_size -= offset;
1886 dest += obj_size;
1887 bytes_to_visit -= obj_size;
1888 }
1889 bytes_copied += first_chunk_size;
1890 // If the last object in this page is next_page_first_obj, then we need to check end boundary
1891 bool check_last_obj = false;
1892 if (next_page_first_obj != nullptr
1893 && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1894 && bytes_copied == gPageSize) {
1895 size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1896 DCHECK_LE(diff, gPageSize);
1897 DCHECK_LE(diff, bytes_to_visit);
1898 bytes_to_visit -= diff;
1899 check_last_obj = true;
1900 }
1901 while (bytes_to_visit > 0) {
1902 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1903 VerifyObject(dest_obj, verify_obj_callback);
1904 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1905 dest_obj,
1906 nullptr,
1907 nullptr);
1908 obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1909 obj_size = RoundUp(obj_size, kAlignment);
1910 bytes_to_visit -= obj_size;
1911 dest += obj_size;
1912 }
1913 DCHECK_EQ(bytes_to_visit, 0u);
1914 if (check_last_obj) {
1915 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1916 VerifyObject(dest_obj, verify_obj_callback);
1917 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1918 dest_obj,
1919 nullptr,
1920 dest_page_end);
1921 mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1922 obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1923 MemberOffset(0),
1924 MemberOffset(dest_page_end - dest));
1925 return;
1926 }
1927 }
1928
1929 // Probably a TLAB finished on this page and/or a new TLAB started as well.
1930 if (bytes_copied < gPageSize) {
1931 src_addr += first_chunk_size;
1932 pre_compact_addr += first_chunk_size;
1933 // Use mark-bitmap to identify where objects are. First call
1934 // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1935 // until that object and then call memcpy on the rest of the page.
1936 // Then call VisitMarkedRange for all marked bits *after* the one found in
1937 // this invocation. This time to visit references.
1938 uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1939 uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1940 mirror::Object* found_obj = nullptr;
1941 moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1942 page_end,
1943 [&found_obj](mirror::Object* obj) {
1944 found_obj = obj;
1945 });
1946 size_t remaining_bytes = gPageSize - bytes_copied;
1947 if (found_obj == nullptr) {
1948 if (needs_memset_zero) {
1949 // No more black objects in this page. Zero the remaining bytes and return.
1950 std::memset(dest, 0x0, remaining_bytes);
1951 }
1952 return;
1953 }
1954 // Copy everything in this page, which includes any zeroed regions
1955 // in-between.
1956 std::memcpy(dest, src_addr, remaining_bytes);
1957 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1958 moving_space_bitmap_->VisitMarkedRange(
1959 reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1960 page_end,
1961 [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1962 REQUIRES_SHARED(Locks::mutator_lock_) {
1963 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1964 mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1965 VerifyObject(ref, verify_obj_callback);
1966 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1967 visitor(this, ref, nullptr, nullptr);
1968 ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1969 MemberOffset(0),
1970 MemberOffset(-1));
1971 // Remember for next round.
1972 found_obj = obj;
1973 });
1974 // found_obj may have been updated in VisitMarkedRange. Visit the last found
1975 // object.
1976 DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1977 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1978 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1979 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
1980 VerifyObject(dest_obj, verify_obj_callback);
1981 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
1982 this, dest_obj, nullptr, dest_page_end);
1983 // Last object could overlap with next page. And if it happens to be a
1984 // class, then we may access something (like static-fields' offsets) which
1985 // is on the next page. Therefore, use from-space's reference.
1986 mirror::Object* obj = GetFromSpaceAddr(found_obj);
1987 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
1988 visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
1989 }
1990 }
1991
1992 template <bool kFirstPageMapping>
MapProcessedPages(uint8_t * to_space_start,Atomic<PageState> * state_arr,size_t arr_idx,size_t arr_len)1993 void MarkCompact::MapProcessedPages(uint8_t* to_space_start,
1994 Atomic<PageState>* state_arr,
1995 size_t arr_idx,
1996 size_t arr_len) {
1997 CHECK(minor_fault_initialized_);
1998 DCHECK_LT(arr_idx, arr_len);
1999 DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2000 // Claim all the contiguous pages, which are ready to be mapped, and then do
2001 // so in a single ioctl. This helps avoid the overhead of invoking syscall
2002 // several times and also maps the already-processed pages, avoiding
2003 // unnecessary faults on them.
2004 size_t length = kFirstPageMapping ? gPageSize : 0;
2005 if (kFirstPageMapping) {
2006 arr_idx++;
2007 }
2008 // We need to guarantee that we don't end up sucsessfully marking a later
2009 // page 'mapping' and then fail to mark an earlier page. To guarantee that
2010 // we use acq_rel order.
2011 for (; arr_idx < arr_len; arr_idx++, length += gPageSize) {
2012 PageState expected_state = PageState::kProcessed;
2013 if (!state_arr[arr_idx].compare_exchange_strong(
2014 expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) {
2015 break;
2016 }
2017 }
2018 if (length > 0) {
2019 // Note: We need the first page to be attempted (to be mapped) by the ioctl
2020 // as this function is called due to some mutator thread waiting on the
2021 // 'to_space_start' page. Therefore, the ioctl must always be called
2022 // with 'to_space_start' as the 'start' address because it can bail out in
2023 // the middle (not attempting to map the subsequent pages) if it finds any
2024 // page either already mapped in between, or missing on the shadow-map.
2025 struct uffdio_continue uffd_continue;
2026 uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start);
2027 uffd_continue.range.len = length;
2028 uffd_continue.mode = 0;
2029 int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue);
2030 if (UNLIKELY(ret == -1 && errno == EAGAIN)) {
2031 // This can happen only in linear-alloc.
2032 DCHECK(linear_alloc_spaces_data_.end() !=
2033 std::find_if(linear_alloc_spaces_data_.begin(),
2034 linear_alloc_spaces_data_.end(),
2035 [to_space_start](const LinearAllocSpaceData& data) {
2036 return data.begin_ <= to_space_start && to_space_start < data.end_;
2037 }));
2038
2039 // This could happen if userfaultfd couldn't find any pages mapped in the
2040 // shadow map. For instance, if there are certain (contiguous) pages on
2041 // linear-alloc which are allocated and have first-object set-up but have
2042 // not been accessed yet.
2043 // Bail out by setting the remaining pages' state back to kProcessed and
2044 // then waking up any waiting threads.
2045 DCHECK_GE(uffd_continue.mapped, 0);
2046 DCHECK_ALIGNED_PARAM(uffd_continue.mapped, gPageSize);
2047 DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length));
2048 if (kFirstPageMapping) {
2049 // In this case the first page must be mapped.
2050 DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(gPageSize));
2051 }
2052 // Nobody would modify these pages' state simultaneously so only atomic
2053 // store is sufficient. Use 'release' order to ensure that all states are
2054 // modified sequentially.
2055 for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0;
2056 remaining_len -= gPageSize) {
2057 arr_idx--;
2058 DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
2059 PageState::kProcessedAndMapping);
2060 state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release);
2061 }
2062 uffd_continue.range.start =
2063 reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped;
2064 uffd_continue.range.len = length - uffd_continue.mapped;
2065 ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range);
2066 CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno);
2067 } else {
2068 // We may receive ENOENT if gc-thread unregisters the
2069 // range behind our back, which is fine because that
2070 // happens only when it knows compaction is done.
2071 CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT)
2072 << "ioctl_userfaultfd: continue failed: " << strerror(errno);
2073 if (ret == 0) {
2074 DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length));
2075 }
2076 }
2077 if (use_uffd_sigbus_) {
2078 // Nobody else would modify these pages' state simultaneously so atomic
2079 // store is sufficient.
2080 for (; uffd_continue.mapped > 0; uffd_continue.mapped -= gPageSize) {
2081 arr_idx--;
2082 DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
2083 PageState::kProcessedAndMapping);
2084 state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
2085 }
2086 }
2087 }
2088 }
2089
2090 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)2091 static void BackOff(uint32_t i) {
2092 // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
2093 if (i <= kYieldMax) {
2094 sched_yield();
2095 } else {
2096 // nanosleep is not in the async-signal-safe list, but bionic implements it
2097 // with a pure system call, so it should be fine.
2098 NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
2099 }
2100 }
2101
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)2102 size_t MarkCompact::ZeropageIoctl(void* addr,
2103 size_t length,
2104 bool tolerate_eexist,
2105 bool tolerate_enoent) {
2106 int32_t backoff_count = -1;
2107 int32_t max_backoff = 10; // max native priority.
2108 struct uffdio_zeropage uffd_zeropage;
2109 DCHECK(IsAlignedParam(addr, gPageSize));
2110 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
2111 uffd_zeropage.range.len = length;
2112 uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
2113 while (true) {
2114 uffd_zeropage.zeropage = 0;
2115 int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
2116 if (ret == 0) {
2117 DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
2118 return length;
2119 } else if (errno == EAGAIN) {
2120 if (uffd_zeropage.zeropage > 0) {
2121 // Contention was observed after acquiring mmap_lock. But the first page
2122 // is already done, which is what we care about.
2123 DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
2124 DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
2125 return uffd_zeropage.zeropage;
2126 } else if (uffd_zeropage.zeropage < 0) {
2127 // mmap_read_trylock() failed due to contention. Back-off and retry.
2128 DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
2129 if (backoff_count == -1) {
2130 int prio = Thread::Current()->GetNativePriority();
2131 DCHECK(prio > 0 && prio <= 10) << prio;
2132 max_backoff -= prio;
2133 backoff_count = 0;
2134 }
2135 if (backoff_count < max_backoff) {
2136 // Using 3 to align 'normal' priority threads with sleep.
2137 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2138 } else {
2139 uffd_zeropage.mode = 0;
2140 }
2141 }
2142 } else if (tolerate_eexist && errno == EEXIST) {
2143 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2144 // wouldn't be included in it.
2145 return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
2146 } else {
2147 CHECK(tolerate_enoent && errno == ENOENT)
2148 << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
2149 return 0;
2150 }
2151 }
2152 }
2153
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention)2154 size_t MarkCompact::CopyIoctl(void* dst, void* buffer, size_t length, bool return_on_contention) {
2155 int32_t backoff_count = -1;
2156 int32_t max_backoff = 10; // max native priority.
2157 struct uffdio_copy uffd_copy;
2158 uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
2159 uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
2160 uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
2161 uffd_copy.len = length;
2162 uffd_copy.copy = 0;
2163 while (true) {
2164 int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
2165 if (ret == 0) {
2166 DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
2167 break;
2168 } else if (errno == EAGAIN) {
2169 // Contention observed.
2170 DCHECK_NE(uffd_copy.copy, 0);
2171 if (uffd_copy.copy > 0) {
2172 // Contention was observed after acquiring mmap_lock.
2173 DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
2174 DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
2175 break;
2176 } else {
2177 // mmap_read_trylock() failed due to contention.
2178 DCHECK_EQ(uffd_copy.copy, -EAGAIN);
2179 uffd_copy.copy = 0;
2180 if (return_on_contention) {
2181 break;
2182 }
2183 }
2184 if (backoff_count == -1) {
2185 int prio = Thread::Current()->GetNativePriority();
2186 DCHECK(prio > 0 && prio <= 10) << prio;
2187 max_backoff -= prio;
2188 backoff_count = 0;
2189 }
2190 if (backoff_count < max_backoff) {
2191 // Using 3 to align 'normal' priority threads with sleep.
2192 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2193 } else {
2194 uffd_copy.mode = 0;
2195 }
2196 } else if (errno == EEXIST) {
2197 DCHECK_NE(uffd_copy.copy, 0);
2198 if (uffd_copy.copy < 0) {
2199 uffd_copy.copy = 0;
2200 }
2201 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2202 // wouldn't be included in it.
2203 uffd_copy.copy += gPageSize;
2204 break;
2205 } else {
2206 DCHECK_EQ(uffd_copy.copy, -errno);
2207 LOG(FATAL) << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
2208 << " dst:" << dst;
2209 }
2210 }
2211 return uffd_copy.copy;
2212 }
2213
2214 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)2215 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
2216 uint8_t* to_space_page,
2217 uint8_t* page,
2218 bool map_immediately,
2219 CompactionFn func) {
2220 uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
2221 uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
2222 PageState::kProcessing);
2223 // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
2224 // to moving_spaces_status_[page_idx] is released before the contents of the page are
2225 // made accessible to other threads.
2226 //
2227 // We need acquire ordering here to ensure that when the CAS fails, another thread
2228 // has completed processing the page, which is guaranteed by the release below.
2229 if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2230 expected_state, desired_state, std::memory_order_acquire)) {
2231 func();
2232 if (kMode == kCopyMode) {
2233 if (map_immediately) {
2234 CopyIoctl(to_space_page, page, gPageSize, /*return_on_contention=*/false);
2235 // Store is sufficient as no other thread could modify the status at this
2236 // point. Relaxed order is sufficient as the ioctl will act as a fence.
2237 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2238 std::memory_order_relaxed);
2239 } else {
2240 // Add the src page's index in the status word.
2241 DCHECK(from_space_map_.HasAddress(page));
2242 DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
2243 std::numeric_limits<uint32_t>::max());
2244 uint32_t store_val = page - from_space_begin_;
2245 DCHECK_EQ(store_val & kPageStateMask, 0u);
2246 store_val |= static_cast<uint8_t>(PageState::kProcessed);
2247 // Store is sufficient as no other thread would modify the status at this point.
2248 moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
2249 }
2250 } else if (kMode == kMinorFaultMode) {
2251 expected_state = static_cast<uint8_t>(PageState::kProcessing);
2252 desired_state = static_cast<uint8_t>(PageState::kProcessed);
2253 // the CAS needs to be with release order to ensure that stores to the
2254 // page makes it to memory *before* other threads observe that it's
2255 // ready to be mapped.
2256 if (!moving_pages_status_[page_idx].compare_exchange_strong(
2257 expected_state, desired_state, std::memory_order_release)) {
2258 // Some mutator has requested to map the page after processing it.
2259 DCHECK_EQ(expected_state, static_cast<uint8_t>(PageState::kProcessingAndMapping));
2260 }
2261 UNREACHABLE();
2262 }
2263 return true;
2264 } else {
2265 // Only GC thread could have set the state to Processed.
2266 DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
2267 return false;
2268 }
2269 }
2270
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)2271 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
2272 // Thanks to sliding compaction, bump-pointer allocations, and reverse
2273 // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2274 // the to-space page up to which compaction has finished, all the from-space
2275 // pages corresponding to this onwards can be freed. There are some corner
2276 // cases to be taken care of, which are described below.
2277 size_t idx = last_checked_reclaim_page_idx_;
2278 // Find the to-space page up to which the corresponding from-space pages can be
2279 // freed.
2280 for (; idx > cur_page_idx; idx--) {
2281 PageState state = GetMovingPageState(idx - 1);
2282 if (state == PageState::kMutatorProcessing) {
2283 // Some mutator is working on the page.
2284 break;
2285 }
2286 DCHECK(state >= PageState::kProcessed ||
2287 (state == PageState::kUnprocessed &&
2288 (mode == kFallbackMode || idx > moving_first_objs_count_)));
2289 }
2290 DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2291 if (idx == last_checked_reclaim_page_idx_) {
2292 // Nothing to do.
2293 return false;
2294 }
2295
2296 uint8_t* reclaim_begin;
2297 uint8_t* idx_addr;
2298 // Calculate the first from-space page to be freed using 'idx'. If the
2299 // first-object of the idx'th to-space page started before the corresponding
2300 // from-space page, which is almost always the case in the compaction portion
2301 // of the moving-space, then it indicates that the subsequent pages that are
2302 // yet to be compacted will need the from-space pages. Therefore, find the page
2303 // (from the already compacted pages) whose first-object is different from
2304 // ours. All the from-space pages starting from that one are safe to be
2305 // removed. Please note that this iteration is not expected to be long in
2306 // normal cases as objects are smaller than page size.
2307 if (idx >= moving_first_objs_count_) {
2308 // black-allocated portion of the moving-space
2309 idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
2310 reclaim_begin = idx_addr;
2311 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2312 if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2313 size_t idx_len = moving_first_objs_count_ + black_page_count_;
2314 for (size_t i = idx + 1; i < idx_len; i++) {
2315 mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2316 // A null first-object indicates that the corresponding to-space page is
2317 // not used yet. So we can compute its from-space page and use that.
2318 if (obj != first_obj) {
2319 reclaim_begin = obj != nullptr
2320 ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
2321 : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
2322 break;
2323 }
2324 }
2325 }
2326 } else {
2327 DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2328 idx_addr = bump_pointer_space_->Begin() + pre_compact_offset_moving_space_[idx] * kAlignment;
2329 reclaim_begin = idx_addr;
2330 DCHECK_LE(reclaim_begin, black_allocations_begin_);
2331 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2332 if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2333 DCHECK_LT(idx, moving_first_objs_count_);
2334 mirror::Object* obj = first_obj;
2335 for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2336 obj = first_objs_moving_space_[i].AsMirrorPtr();
2337 if (first_obj != obj) {
2338 DCHECK_LT(first_obj, obj);
2339 DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2340 reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2341 break;
2342 }
2343 }
2344 if (obj == first_obj) {
2345 reclaim_begin = black_allocations_begin_;
2346 }
2347 }
2348 reclaim_begin = AlignUp(reclaim_begin, gPageSize);
2349 }
2350
2351 DCHECK_NE(reclaim_begin, nullptr);
2352 DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2353 DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2354 // Check if the 'class_after_obj_map_' map allows pages to be freed.
2355 for (; class_after_obj_iter_ != class_after_obj_ordered_map_.rend(); class_after_obj_iter_++) {
2356 mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2357 mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2358 // Check with class' end to ensure that, if required, the entire class survives.
2359 uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2360 DCHECK_LE(klass_end, last_reclaimed_page_);
2361 if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2362 // Found a class which is in the reclaim range.
2363 uint8_t* obj_addr = reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr());
2364 // NOTE: Don't assert that obj is of 'klass' type as klass could instead
2365 // be its super-class.
2366 if (obj_addr < idx_addr) {
2367 // Its lowest-address object is not compacted yet. Reclaim starting from
2368 // the end of this class.
2369 reclaim_begin = AlignUp(klass_end, gPageSize);
2370 } else {
2371 // Continue consuming pairs wherein the lowest address object has already
2372 // been compacted.
2373 continue;
2374 }
2375 }
2376 // All the remaining class (and thereby corresponding object) addresses are
2377 // lower than the reclaim range.
2378 break;
2379 }
2380 bool all_mapped = mode == kFallbackMode;
2381 ssize_t size = last_reclaimed_page_ - reclaim_begin;
2382 if (size > kMinFromSpaceMadviseSize) {
2383 // Map all the pages in the range.
2384 if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2385 if (MapMovingSpacePages(cur_page_idx,
2386 end_idx_for_mapping,
2387 /*from_ioctl=*/false,
2388 /*return_on_contention=*/true) ==
2389 end_idx_for_mapping - cur_page_idx) {
2390 all_mapped = true;
2391 }
2392 } else {
2393 // This for the black-allocations pages so that madvise is not missed.
2394 all_mapped = true;
2395 }
2396 // If not all pages are mapped, then take it as a hint that mmap_lock is
2397 // contended and hence don't madvise as that also needs the same lock.
2398 if (all_mapped) {
2399 // Retain a few pages for subsequent compactions.
2400 const ssize_t gBufferPages = 4 * gPageSize;
2401 DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2402 size -= gBufferPages;
2403 uint8_t* addr = last_reclaimed_page_ - size;
2404 int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED;
2405 CHECK_EQ(madvise(addr + from_space_slide_diff_, size, behavior), 0)
2406 << "madvise of from-space failed: " << strerror(errno);
2407 last_reclaimed_page_ = addr;
2408 cur_reclaimable_page_ = addr;
2409 }
2410 }
2411 CHECK_LE(reclaim_begin, last_reclaimable_page_);
2412 last_reclaimable_page_ = reclaim_begin;
2413 last_checked_reclaim_page_idx_ = idx;
2414 return all_mapped;
2415 }
2416
UpdateClassAfterObjMap()2417 void MarkCompact::UpdateClassAfterObjMap() {
2418 CHECK(class_after_obj_ordered_map_.empty());
2419 for (const auto& pair : class_after_obj_hash_map_) {
2420 auto super_class_iter = super_class_after_class_hash_map_.find(pair.first);
2421 ObjReference key = super_class_iter != super_class_after_class_hash_map_.end()
2422 ? super_class_iter->second
2423 : pair.first;
2424 if (std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), key.AsMirrorPtr()) &&
2425 HasAddress(key.AsMirrorPtr())) {
2426 auto [ret_iter, success] = class_after_obj_ordered_map_.try_emplace(key, pair.second);
2427 // It could fail only if the class 'key' has objects of its own, which are lower in
2428 // address order, as well of some of its derived class. In this case
2429 // choose the lowest address object.
2430 if (!success &&
2431 std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), ret_iter->second.AsMirrorPtr())) {
2432 ret_iter->second = pair.second;
2433 }
2434 }
2435 }
2436 class_after_obj_hash_map_.clear();
2437 super_class_after_class_hash_map_.clear();
2438 }
2439
2440 template <int kMode>
CompactMovingSpace(uint8_t * page)2441 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2442 // For every page we have a starting object, which may have started in some
2443 // preceding page, and an offset within that object from where we must start
2444 // copying.
2445 // Consult the live-words bitmap to copy all contiguously live words at a
2446 // time. These words may constitute multiple objects. To avoid the need for
2447 // consulting mark-bitmap to find where does the next live object start, we
2448 // use the object-size returned by VisitRefsForCompaction.
2449 //
2450 // We do the compaction in reverse direction so that the pages containing
2451 // TLAB and latest allocations are processed first.
2452 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2453 size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2454 size_t idx = page_status_arr_len;
2455 uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * gPageSize;
2456 uint8_t* shadow_space_end = nullptr;
2457 if (kMode == kMinorFaultMode) {
2458 shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * gPageSize;
2459 }
2460 uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2461
2462 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2463
2464 UpdateClassAfterObjMap();
2465 // These variables are maintained by FreeFromSpacePages().
2466 last_reclaimed_page_ = pre_compact_page;
2467 last_reclaimable_page_ = last_reclaimed_page_;
2468 cur_reclaimable_page_ = last_reclaimed_page_;
2469 last_checked_reclaim_page_idx_ = idx;
2470 class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin();
2471 // Allocated-black pages
2472 mirror::Object* next_page_first_obj = nullptr;
2473 while (idx > moving_first_objs_count_) {
2474 idx--;
2475 pre_compact_page -= gPageSize;
2476 to_space_end -= gPageSize;
2477 if (kMode == kMinorFaultMode) {
2478 shadow_space_end -= gPageSize;
2479 page = shadow_space_end;
2480 } else if (kMode == kFallbackMode) {
2481 page = to_space_end;
2482 }
2483 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2484 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2485 if (first_obj != nullptr) {
2486 DoPageCompactionWithStateChange<kMode>(idx,
2487 to_space_end,
2488 page,
2489 /*map_immediately=*/true,
2490 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2491 SlideBlackPage(first_obj,
2492 next_page_first_obj,
2493 first_chunk_size,
2494 pre_compact_page,
2495 page,
2496 kMode == kCopyMode);
2497 });
2498 // We are sliding here, so no point attempting to madvise for every
2499 // page. Wait for enough pages to be done.
2500 if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2501 FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2502 }
2503 }
2504 next_page_first_obj = first_obj;
2505 }
2506 DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2507 // Reserved page to be used if we can't find any reclaimable page for processing.
2508 uint8_t* reserve_page = page;
2509 size_t end_idx_for_mapping = idx;
2510 while (idx > 0) {
2511 idx--;
2512 to_space_end -= gPageSize;
2513 if (kMode == kMinorFaultMode) {
2514 shadow_space_end -= gPageSize;
2515 page = shadow_space_end;
2516 } else if (kMode == kFallbackMode) {
2517 page = to_space_end;
2518 } else {
2519 DCHECK_EQ(kMode, kCopyMode);
2520 if (cur_reclaimable_page_ > last_reclaimable_page_) {
2521 cur_reclaimable_page_ -= gPageSize;
2522 page = cur_reclaimable_page_ + from_space_slide_diff_;
2523 } else {
2524 page = reserve_page;
2525 }
2526 }
2527 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2528 bool success = DoPageCompactionWithStateChange<kMode>(
2529 idx,
2530 to_space_end,
2531 page,
2532 /*map_immediately=*/page == reserve_page,
2533 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2534 CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2535 });
2536 if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2537 // map the pages in the following address as they can't be mapped with the
2538 // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2539 MapMovingSpacePages(
2540 idx + 1, end_idx_for_mapping, /*from_fault=*/false, /*return_on_contention=*/true);
2541 }
2542 if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2543 end_idx_for_mapping = idx;
2544 }
2545 }
2546 // map one last time to finish anything left.
2547 if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2548 MapMovingSpacePages(
2549 idx, end_idx_for_mapping, /*from_fault=*/false, /*return_on_contention=*/false);
2550 }
2551 DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2552 }
2553
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention)2554 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2555 size_t arr_len,
2556 bool from_fault,
2557 bool return_on_contention) {
2558 DCHECK_LT(start_idx, arr_len);
2559 size_t arr_idx = start_idx;
2560 bool wait_for_unmapped = false;
2561 while (arr_idx < arr_len) {
2562 size_t map_count = 0;
2563 uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2564 // Find a contiguous range that can be mapped with single ioctl.
2565 for (size_t i = arr_idx; i < arr_len; i++, map_count++) {
2566 uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2567 if (GetPageStateFromWord(s) != PageState::kProcessed) {
2568 break;
2569 }
2570 DCHECK_EQ((cur_state & ~kPageStateMask) + (i - arr_idx) * gPageSize, s & ~kPageStateMask);
2571 }
2572
2573 if (map_count == 0) {
2574 if (from_fault) {
2575 bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2576 return mapped ? 1 : 0;
2577 }
2578 // Skip the pages that this thread cannot map.
2579 for (; arr_idx < arr_len; arr_idx++) {
2580 PageState s = GetMovingPageState(arr_idx);
2581 if (s == PageState::kProcessed) {
2582 break;
2583 } else if (s != PageState::kProcessedAndMapped) {
2584 wait_for_unmapped = true;
2585 }
2586 }
2587 } else {
2588 uint32_t from_space_offset = cur_state & ~kPageStateMask;
2589 uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2590 uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2591 DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2592 DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2593 size_t mapped_len =
2594 CopyIoctl(to_space_start, from_space_start, map_count * gPageSize, return_on_contention);
2595 for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2596 // Store is sufficient as anyone storing is doing it with the same value.
2597 moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2598 std::memory_order_release);
2599 }
2600 if (from_fault) {
2601 return DivideByPageSize(mapped_len);
2602 }
2603 // We can return from COPY ioctl with a smaller length also if a page
2604 // was found to be already mapped. But that doesn't count as contention.
2605 if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2606 return arr_idx - start_idx;
2607 }
2608 }
2609 }
2610 if (wait_for_unmapped) {
2611 for (size_t i = start_idx; i < arr_len; i++) {
2612 PageState s = GetMovingPageState(i);
2613 DCHECK_GT(s, PageState::kProcessed);
2614 uint32_t backoff_count = 0;
2615 while (s != PageState::kProcessedAndMapped) {
2616 BackOff(backoff_count++);
2617 s = GetMovingPageState(i);
2618 }
2619 }
2620 }
2621 return arr_len - start_idx;
2622 }
2623
UpdateNonMovingPage(mirror::Object * first,uint8_t * page)2624 void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
2625 DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2626 // For every object found in the page, visit the previous object. This ensures
2627 // that we can visit without checking page-end boundary.
2628 // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2629 // super-class loads require it.
2630 // TODO: Set kVisitNativeRoots to false once we implement concurrent
2631 // compaction
2632 mirror::Object* curr_obj = first;
2633 non_moving_space_bitmap_->VisitMarkedRange(
2634 reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2635 reinterpret_cast<uintptr_t>(page + gPageSize),
2636 [&](mirror::Object* next_obj) {
2637 // TODO: Once non-moving space update becomes concurrent, we'll
2638 // require fetching the from-space address of 'curr_obj' and then call
2639 // visitor on that.
2640 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2641 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false>
2642 visitor(this, curr_obj, page, page + gPageSize);
2643 MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2644 // Native roots shouldn't be visited as they are done when this
2645 // object's beginning was visited in the preceding page.
2646 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2647 visitor, begin_offset, MemberOffset(-1));
2648 } else {
2649 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2650 visitor(this, curr_obj, page, page + gPageSize);
2651 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2652 MemberOffset(0),
2653 MemberOffset(-1));
2654 }
2655 curr_obj = next_obj;
2656 });
2657
2658 MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2659 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2660 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true>
2661 visitor(this, curr_obj, page, page + gPageSize);
2662 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2663 visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2664 } else {
2665 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
2666 visitor(this, curr_obj, page, page + gPageSize);
2667 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, MemberOffset(0), end_offset);
2668 }
2669 }
2670
UpdateNonMovingSpace()2671 void MarkCompact::UpdateNonMovingSpace() {
2672 TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2673 // Iterating in reverse ensures that the class pointer in objects which span
2674 // across more than one page gets updated in the end. This is necessary for
2675 // VisitRefsForCompaction() to work correctly.
2676 // TODO: If and when we make non-moving space update concurrent, implement a
2677 // mechanism to remember class pointers for such objects off-heap and pass it
2678 // to VisitRefsForCompaction().
2679 uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2680 for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2681 mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2682 page -= gPageSize;
2683 // null means there are no objects on the page to update references.
2684 if (obj != nullptr) {
2685 UpdateNonMovingPage(obj, page);
2686 }
2687 }
2688 }
2689
UpdateMovingSpaceBlackAllocations()2690 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2691 // For sliding black pages, we need the first-object, which overlaps with the
2692 // first byte of the page. Additionally, we compute the size of first chunk of
2693 // black objects. This will suffice for most black pages. Unlike, compaction
2694 // pages, here we don't need to pre-compute the offset within first-obj from
2695 // where sliding has to start. That can be calculated using the pre-compact
2696 // address of the page. Therefore, to save space, we store the first chunk's
2697 // size in black_alloc_pages_first_chunk_size_ array.
2698 // For the pages which may have holes after the first chunk, which could happen
2699 // if a new TLAB starts in the middle of the page, we mark the objects in
2700 // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2701 // then we use the mark-bitmap for the remainder of the page.
2702 uint8_t* const begin = bump_pointer_space_->Begin();
2703 uint8_t* black_allocs = black_allocations_begin_;
2704 DCHECK_LE(begin, black_allocs);
2705 size_t consumed_blocks_count = 0;
2706 size_t first_block_size;
2707 // Needed only for debug at the end of the function. Hopefully compiler will
2708 // eliminate it otherwise.
2709 size_t num_blocks = 0;
2710 // Get the list of all blocks allocated in the bump-pointer space.
2711 std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2712 &first_block_size);
2713 DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2714 if (block_sizes != nullptr) {
2715 size_t black_page_idx = moving_first_objs_count_;
2716 uint8_t* block_end = begin + first_block_size;
2717 uint32_t remaining_chunk_size = 0;
2718 uint32_t first_chunk_size = 0;
2719 mirror::Object* first_obj = nullptr;
2720 num_blocks = block_sizes->size();
2721 for (size_t block_size : *block_sizes) {
2722 block_end += block_size;
2723 // Skip the blocks that are prior to the black allocations. These will be
2724 // merged with the main-block later.
2725 if (black_allocs >= block_end) {
2726 consumed_blocks_count++;
2727 continue;
2728 }
2729 mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2730 bool set_mark_bit = remaining_chunk_size > 0;
2731 // We don't know how many objects are allocated in the current block. When we hit
2732 // a null assume it's the end. This works as every block is expected to
2733 // have objects allocated linearly using bump-pointer.
2734 // BumpPointerSpace::Walk() also works similarly.
2735 while (black_allocs < block_end
2736 && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2737 // Try to keep instructions which access class instance together to
2738 // avoid reloading the pointer from object.
2739 size_t obj_size = obj->SizeOf();
2740 bytes_scanned_ += obj_size;
2741 obj_size = RoundUp(obj_size, kAlignment);
2742 UpdateClassAfterObjectMap(obj);
2743 if (first_obj == nullptr) {
2744 first_obj = obj;
2745 }
2746 // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2747 // the middle of the page.
2748 if (set_mark_bit) {
2749 moving_space_bitmap_->Set(obj);
2750 }
2751 // Handle objects which cross page boundary, including objects larger
2752 // than page size.
2753 if (remaining_chunk_size + obj_size >= gPageSize) {
2754 set_mark_bit = false;
2755 first_chunk_size += gPageSize - remaining_chunk_size;
2756 remaining_chunk_size += obj_size;
2757 // We should not store first-object and remaining_chunk_size if there were
2758 // unused bytes before this TLAB, in which case we must have already
2759 // stored the values (below).
2760 if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2761 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2762 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2763 }
2764 black_page_idx++;
2765 remaining_chunk_size -= gPageSize;
2766 // Consume an object larger than page size.
2767 while (remaining_chunk_size >= gPageSize) {
2768 black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2769 first_objs_moving_space_[black_page_idx].Assign(obj);
2770 black_page_idx++;
2771 remaining_chunk_size -= gPageSize;
2772 }
2773 first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2774 first_chunk_size = remaining_chunk_size;
2775 } else {
2776 DCHECK_LE(first_chunk_size, remaining_chunk_size);
2777 first_chunk_size += obj_size;
2778 remaining_chunk_size += obj_size;
2779 }
2780 black_allocs += obj_size;
2781 obj = reinterpret_cast<mirror::Object*>(black_allocs);
2782 }
2783 DCHECK_LE(black_allocs, block_end);
2784 DCHECK_LT(remaining_chunk_size, gPageSize);
2785 // consume the unallocated portion of the block
2786 if (black_allocs < block_end) {
2787 // first-chunk of the current page ends here. Store it.
2788 if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2789 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2790 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2791 }
2792 first_chunk_size = 0;
2793 first_obj = nullptr;
2794 size_t page_remaining = gPageSize - remaining_chunk_size;
2795 size_t block_remaining = block_end - black_allocs;
2796 if (page_remaining <= block_remaining) {
2797 block_remaining -= page_remaining;
2798 // current page and the subsequent empty pages in the block
2799 black_page_idx += 1 + DivideByPageSize(block_remaining);
2800 remaining_chunk_size = ModuloPageSize(block_remaining);
2801 } else {
2802 remaining_chunk_size += block_remaining;
2803 }
2804 black_allocs = block_end;
2805 }
2806 }
2807 if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2808 // Store the leftover first-chunk, if any, and update page index.
2809 if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2810 black_page_idx++;
2811 } else if (first_chunk_size > 0) {
2812 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2813 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2814 black_page_idx++;
2815 }
2816 }
2817 black_page_count_ = black_page_idx - moving_first_objs_count_;
2818 delete block_sizes;
2819 }
2820 // Update bump-pointer space by consuming all the pre-black blocks into the
2821 // main one.
2822 bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2823 post_compact_end_ - begin,
2824 consumed_blocks_count);
2825 if (kIsDebugBuild) {
2826 size_t moving_space_size = bump_pointer_space_->Size();
2827 size_t los_size = 0;
2828 if (heap_->GetLargeObjectsSpace()) {
2829 los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2830 }
2831 // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2832 // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2833 // in Sweep(), but not with moving-space freed-bytes.
2834 CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2835 << " moving-space size:" << moving_space_size
2836 << " moving-space bytes-freed:" << black_objs_slide_diff_
2837 << " large-object-space size:" << los_size
2838 << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2839 << " num-tlabs-merged:" << consumed_blocks_count
2840 << " main-block-size:" << (post_compact_end_ - begin)
2841 << " total-tlabs-moving-space:" << num_blocks;
2842 }
2843 }
2844
UpdateNonMovingSpaceBlackAllocations()2845 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2846 accounting::ObjectStack* stack = heap_->GetAllocationStack();
2847 const StackReference<mirror::Object>* limit = stack->End();
2848 uint8_t* const space_begin = non_moving_space_->Begin();
2849 for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2850 mirror::Object* obj = it->AsMirrorPtr();
2851 if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2852 non_moving_space_bitmap_->Set(obj);
2853 // Clear so that we don't try to set the bit again in the next GC-cycle.
2854 it->Clear();
2855 size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2856 uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2857 mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2858 if (first_obj == nullptr
2859 || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2860 first_objs_non_moving_space_[idx].Assign(obj);
2861 }
2862 mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2863 uint8_t* next_page_begin = page_begin + gPageSize;
2864 if (next_page_first_obj == nullptr
2865 || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2866 size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2867 uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2868 while (next_page_begin < obj_end) {
2869 first_objs_non_moving_space_[idx++].Assign(obj);
2870 next_page_begin += gPageSize;
2871 }
2872 }
2873 // update first_objs count in case we went past non_moving_first_objs_count_
2874 non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2875 }
2876 }
2877 }
2878
2879 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2880 public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2881 explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2882
operator ()(mirror::Object * obj) const2883 void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2884 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2885 obj,
2886 /*begin_*/nullptr,
2887 /*end_*/nullptr);
2888 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2889 visitor, MemberOffset(0), MemberOffset(-1));
2890 }
2891
Callback(mirror::Object * obj,void * arg)2892 static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2893 reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2894 }
2895
2896 private:
2897 MarkCompact* const collector_;
2898 };
2899
2900 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2901 public:
ClassLoaderRootsUpdater(MarkCompact * collector)2902 explicit ClassLoaderRootsUpdater(MarkCompact* collector)
2903 : collector_(collector),
2904 moving_space_begin_(collector->moving_space_begin_),
2905 moving_space_end_(collector->moving_space_end_) {}
2906
Visit(ObjPtr<mirror::ClassLoader> class_loader)2907 void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2908 REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2909 ClassTable* const class_table = class_loader->GetClassTable();
2910 if (class_table != nullptr) {
2911 // Classes are updated concurrently.
2912 class_table->VisitRoots(*this, /*skip_classes=*/true);
2913 }
2914 }
2915
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2916 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2917 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2918 if (!root->IsNull()) {
2919 VisitRoot(root);
2920 }
2921 }
2922
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2923 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
2924 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2925 collector_->UpdateRoot(
2926 root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
2927 }
2928
2929 private:
2930 MarkCompact* collector_;
2931 uint8_t* const moving_space_begin_;
2932 uint8_t* const moving_space_end_;
2933 };
2934
2935 class MarkCompact::LinearAllocPageUpdater {
2936 public:
LinearAllocPageUpdater(MarkCompact * collector)2937 explicit LinearAllocPageUpdater(MarkCompact* collector)
2938 : collector_(collector),
2939 moving_space_begin_(collector->moving_space_begin_),
2940 moving_space_end_(collector->moving_space_end_),
2941 last_page_touched_(false) {}
2942
2943 // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)2944 void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
2945 REQUIRES_SHARED(Locks::mutator_lock_) {
2946 DCHECK(first_obj != nullptr);
2947 DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
2948 uint8_t* page_end = page_begin + gPageSize;
2949 uint32_t obj_size;
2950 for (uint8_t* byte = first_obj; byte < page_end;) {
2951 TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2952 obj_size = header->GetSize();
2953 if (UNLIKELY(obj_size == 0)) {
2954 // No more objects in this page to visit.
2955 last_page_touched_ = byte >= page_begin;
2956 return;
2957 }
2958 uint8_t* obj = byte + sizeof(TrackingHeader);
2959 uint8_t* obj_end = byte + obj_size;
2960 if (header->Is16Aligned()) {
2961 obj = AlignUp(obj, 16);
2962 }
2963 uint8_t* begin_boundary = std::max(obj, page_begin);
2964 uint8_t* end_boundary = std::min(obj_end, page_end);
2965 if (begin_boundary < end_boundary) {
2966 VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2967 }
2968 if (ArenaAllocator::IsRunningOnMemoryTool()) {
2969 obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2970 }
2971 byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2972 }
2973 last_page_touched_ = true;
2974 }
2975
2976 // This version is only used for cases where the entire page is filled with
2977 // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)2978 void SingleObjectArena(uint8_t* page_begin, size_t page_size)
2979 REQUIRES_SHARED(Locks::mutator_lock_) {
2980 static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
2981 DCHECK_ALIGNED(page_begin, kAlignment);
2982 // Least significant bits are used by class-table.
2983 static constexpr uint32_t kMask = kObjectAlignment - 1;
2984 size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
2985 uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
2986 for (size_t i = 0; i < num_roots; root_ptr++, i++) {
2987 uint32_t word = *root_ptr;
2988 if (word != 0) {
2989 uint32_t lsbs = word & kMask;
2990 word &= ~kMask;
2991 VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
2992 *root_ptr = word | lsbs;
2993 last_page_touched_ = true;
2994 }
2995 }
2996 }
2997
WasLastPageTouched() const2998 bool WasLastPageTouched() const { return last_page_touched_; }
2999
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3000 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3001 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3002 if (!root->IsNull()) {
3003 VisitRoot(root);
3004 }
3005 }
3006
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3007 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3008 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3009 mirror::Object* old_ref = root->AsMirrorPtr();
3010 DCHECK_NE(old_ref, nullptr);
3011 if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
3012 mirror::Object* new_ref = old_ref;
3013 if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
3014 new_ref = collector_->PostCompactBlackObjAddr(old_ref);
3015 } else if (collector_->live_words_bitmap_->Test(old_ref)) {
3016 DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
3017 << "ref:" << old_ref << " root:" << root;
3018 new_ref = collector_->PostCompactOldObjAddr(old_ref);
3019 }
3020 if (old_ref != new_ref) {
3021 root->Assign(new_ref);
3022 }
3023 }
3024 }
3025
3026 private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const3027 void VisitObject(LinearAllocKind kind,
3028 void* obj,
3029 uint8_t* start_boundary,
3030 uint8_t* end_boundary) const ALWAYS_INLINE
3031 REQUIRES_SHARED(Locks::mutator_lock_) {
3032 switch (kind) {
3033 case LinearAllocKind::kNoGCRoots:
3034 break;
3035 case LinearAllocKind::kGCRootArray:
3036 {
3037 GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
3038 GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
3039 for (; root < last; root++) {
3040 VisitRootIfNonNull(root->AddressWithoutBarrier());
3041 }
3042 }
3043 break;
3044 case LinearAllocKind::kArtMethodArray:
3045 {
3046 LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
3047 // Old methods are clobbered in debug builds. Check size to confirm if the array
3048 // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
3049 if (array->size() > 0) {
3050 if (collector_->pointer_size_ == PointerSize::k64) {
3051 ArtMethod::VisitArrayRoots<PointerSize::k64>(
3052 *this, start_boundary, end_boundary, array);
3053 } else {
3054 DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
3055 ArtMethod::VisitArrayRoots<PointerSize::k32>(
3056 *this, start_boundary, end_boundary, array);
3057 }
3058 }
3059 }
3060 break;
3061 case LinearAllocKind::kArtMethod:
3062 ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
3063 break;
3064 case LinearAllocKind::kArtFieldArray:
3065 ArtField::VisitArrayRoots(*this,
3066 start_boundary,
3067 end_boundary,
3068 static_cast<LengthPrefixedArray<ArtField>*>(obj));
3069 break;
3070 case LinearAllocKind::kDexCacheArray:
3071 {
3072 mirror::DexCachePair<mirror::Object>* first =
3073 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
3074 mirror::DexCachePair<mirror::Object>* last =
3075 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
3076 mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
3077 }
3078 }
3079 }
3080
3081 MarkCompact* const collector_;
3082 // Cache to speed up checking if GC-root is in moving space or not.
3083 uint8_t* const moving_space_begin_;
3084 uint8_t* const moving_space_end_;
3085 // Whether the last page was touched or not.
3086 bool last_page_touched_ = false;
3087 };
3088
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)3089 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
3090 // If the process is debuggable then redefinition is allowed, which may mean
3091 // pre-zygote-fork class-tables may have pointer to class in moving-space.
3092 // So visit classes from class-sets that are not in linear-alloc arena-pool.
3093 if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
3094 ClassLinker* linker = runtime->GetClassLinker();
3095 ClassLoaderRootsUpdater updater(this);
3096 GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3097 auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
3098 if (!set.empty()) {
3099 return immune_class_table_only ?
3100 immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
3101 !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
3102 }
3103 return false;
3104 };
3105 linker->VisitClassTables([cond, &updater](ClassTable* table)
3106 REQUIRES_SHARED(Locks::mutator_lock_) {
3107 table->VisitClassesIfConditionMet(cond, updater);
3108 });
3109 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3110 linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
3111 }
3112 }
3113
CompactionPause()3114 void MarkCompact::CompactionPause() {
3115 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3116 Runtime* runtime = Runtime::Current();
3117 non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
3118 if (kIsDebugBuild) {
3119 DCHECK_EQ(thread_running_gc_, Thread::Current());
3120 stack_low_addr_ = thread_running_gc_->GetStackEnd();
3121 stack_high_addr_ =
3122 reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize();
3123 }
3124 {
3125 TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
3126 ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3127 // Refresh data-structures to catch-up on allocations that may have
3128 // happened since marking-phase pause.
3129 // There could be several TLABs that got allocated since marking pause. We
3130 // don't want to compact them and instead update the TLAB info in TLS and
3131 // let mutators continue to use the TLABs.
3132 // We need to set all the bits in live-words bitmap corresponding to allocated
3133 // objects. Also, we need to find the objects that are overlapping with
3134 // page-begin boundaries. Unlike objects allocated before
3135 // black_allocations_begin_, which can be identified via mark-bitmap, we can get
3136 // this info only via walking the space past black_allocations_begin_, which
3137 // involves fetching object size.
3138 // TODO: We can reduce the time spent on this in a pause by performing one
3139 // round of this concurrently prior to the pause.
3140 UpdateMovingSpaceBlackAllocations();
3141 // Iterate over the allocation_stack_, for every object in the non-moving
3142 // space:
3143 // 1. Mark the object in live bitmap
3144 // 2. Erase the object from allocation stack
3145 // 3. In the corresponding page, if the first-object vector needs updating
3146 // then do so.
3147 UpdateNonMovingSpaceBlackAllocations();
3148 // This store is visible to mutator (or uffd worker threads) as the mutator
3149 // lock's unlock guarantees that.
3150 compacting_ = true;
3151 // Start updating roots and system weaks now.
3152 heap_->GetReferenceProcessor()->UpdateRoots(this);
3153 }
3154 {
3155 // TODO: Immune space updation has to happen either before or after
3156 // remapping pre-compact pages to from-space. And depending on when it's
3157 // done, we have to invoke VisitRefsForCompaction() with or without
3158 // read-barrier.
3159 TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
3160 accounting::CardTable* const card_table = heap_->GetCardTable();
3161 for (auto& space : immune_spaces_.GetSpaces()) {
3162 DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
3163 accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
3164 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3165 // Having zygote-space indicates that the first zygote fork has taken
3166 // place and that the classes/dex-caches in immune-spaces may have allocations
3167 // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
3168 // non-userfaultfd visited private-anonymous mappings. Visit them here.
3169 ImmuneSpaceUpdateObjVisitor visitor(this);
3170 if (table != nullptr) {
3171 table->ProcessCards();
3172 table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
3173 } else {
3174 WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3175 card_table->Scan<false>(
3176 live_bitmap,
3177 space->Begin(),
3178 space->Limit(),
3179 visitor,
3180 accounting::CardTable::kCardDirty - 1);
3181 }
3182 }
3183 }
3184
3185 {
3186 TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
3187 runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
3188 runtime->VisitNonThreadRoots(this);
3189 {
3190 ClassLinker* linker = runtime->GetClassLinker();
3191 ClassLoaderRootsUpdater updater(this);
3192 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3193 linker->VisitClassLoaders(&updater);
3194 linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
3195 }
3196 SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
3197
3198 bool has_zygote_space = heap_->HasZygoteSpace();
3199 GcVisitedArenaPool* arena_pool =
3200 static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3201 // Update immune/pre-zygote class-tables in case class redefinition took
3202 // place. pre-zygote class-tables that are not in immune spaces are updated
3203 // below if we are in fallback-mode or if there is no zygote space. So in
3204 // that case only visit class-tables that are there in immune-spaces.
3205 UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
3206
3207 // Acquire arena-pool's lock, which should be released after the pool is
3208 // userfaultfd registered. This is to ensure that no new arenas are
3209 // allocated and used in between. Since they will not be captured in
3210 // linear_alloc_arenas_ below, we will miss updating their pages. The same
3211 // reason also applies to new allocations within the existing arena which
3212 // may change last_byte.
3213 // Since we are in a STW pause, this shouldn't happen anyways, but holding
3214 // the lock confirms it.
3215 // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
3216 // doesn't fail once it is available for ReaderWriterMutex.
3217 WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
3218
3219 // TODO: Find out why it's not sufficient to visit native roots of immune
3220 // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
3221 // Is it possible that some native root starts getting pointed to by some object
3222 // in moving space after fork? Or are we missing a write-barrier somewhere
3223 // when a native root is updated?
3224 auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
3225 REQUIRES_SHARED(Locks::mutator_lock_) {
3226 LinearAllocPageUpdater updater(this);
3227 if (first_obj != nullptr) {
3228 updater.MultiObjectArena(page_begin, first_obj);
3229 } else {
3230 updater.SingleObjectArena(page_begin, page_size);
3231 }
3232 };
3233 if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
3234 // Besides fallback-mode, visit linear-alloc space in the pause for zygote
3235 // processes prior to first fork (that's when zygote space gets created).
3236 if (kIsDebugBuild && IsValidFd(uffd_)) {
3237 // All arenas allocated so far are expected to be pre-zygote fork.
3238 arena_pool->ForEachAllocatedArena(
3239 [](const TrackedArena& arena)
3240 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
3241 }
3242 arena_pool->VisitRoots(arena_visitor);
3243 } else {
3244 // Inform the arena-pool that compaction is going on. So the TrackedArena
3245 // objects corresponding to the arenas that are freed shouldn't be deleted
3246 // immediately. We will do that in FinishPhase(). This is to avoid ABA
3247 // problem.
3248 arena_pool->DeferArenaFreeing();
3249 arena_pool->ForEachAllocatedArena(
3250 [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
3251 REQUIRES_SHARED(Locks::mutator_lock_) {
3252 // The pre-zygote fork arenas are not visited concurrently in the
3253 // zygote children processes. The native roots of the dirty objects
3254 // are visited during immune space visit below.
3255 if (!arena.IsPreZygoteForkArena()) {
3256 uint8_t* last_byte = arena.GetLastUsedByte();
3257 auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
3258 CHECK(ret.second);
3259 } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
3260 // Pre-zygote class-table and intern-table don't need to be updated.
3261 // TODO: Explore the possibility of using /proc/self/pagemap to
3262 // fetch which pages in these arenas are private-dirty and then only
3263 // visit those pages. To optimize it further, we can keep all
3264 // pre-zygote arenas in a single memory range so that just one read
3265 // from pagemap is sufficient.
3266 arena.VisitRoots(arena_visitor);
3267 }
3268 });
3269 }
3270 if (use_uffd_sigbus_) {
3271 // Release order wrt to mutator threads' SIGBUS handler load.
3272 sigbus_in_progress_count_.store(0, std::memory_order_release);
3273 }
3274 KernelPreparation();
3275 }
3276
3277 UpdateNonMovingSpace();
3278 // fallback mode
3279 if (uffd_ == kFallbackMode) {
3280 CompactMovingSpace<kFallbackMode>(nullptr);
3281
3282 int32_t freed_bytes = black_objs_slide_diff_;
3283 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3284 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3285 } else {
3286 DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u);
3287 DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
3288 if (!use_uffd_sigbus_) {
3289 // We must start worker threads before resuming mutators to avoid deadlocks.
3290 heap_->GetThreadPool()->StartWorkers(thread_running_gc_);
3291 }
3292 }
3293 stack_low_addr_ = nullptr;
3294 }
3295
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size,int fd,uint8_t * shadow_addr)3296 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr,
3297 uint8_t* from_addr,
3298 size_t map_size,
3299 int fd,
3300 uint8_t* shadow_addr) {
3301 int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
3302 if (gHaveMremapDontunmap) {
3303 mremap_flags |= MREMAP_DONTUNMAP;
3304 }
3305
3306 void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
3307 CHECK_EQ(ret, static_cast<void*>(from_addr))
3308 << "mremap to move pages failed: " << strerror(errno)
3309 << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
3310
3311 if (shadow_addr != nullptr) {
3312 DCHECK_EQ(fd, kFdUnused);
3313 DCHECK(gHaveMremapDontunmap);
3314 ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr);
3315 CHECK_EQ(ret, static_cast<void*>(to_addr))
3316 << "mremap from shadow to to-space map failed: " << strerror(errno);
3317 } else if (!gHaveMremapDontunmap || fd > kFdUnused) {
3318 // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
3319 // the moving space again.
3320 int mmap_flags = MAP_FIXED;
3321 if (fd == kFdUnused) {
3322 // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
3323 // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
3324 // available, to avoid unmapping someone else' mapping and then causing
3325 // crashes elsewhere.
3326 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
3327 // On some platforms MAP_ANONYMOUS expects fd to be -1.
3328 fd = -1;
3329 } else if (IsValidFd(fd)) {
3330 mmap_flags |= MAP_SHARED;
3331 } else {
3332 DCHECK_EQ(fd, kFdSharedAnon);
3333 mmap_flags |= MAP_SHARED | MAP_ANONYMOUS;
3334 }
3335 ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0);
3336 CHECK_EQ(ret, static_cast<void*>(to_addr))
3337 << "mmap for moving space failed: " << strerror(errno);
3338 }
3339 }
3340
KernelPreparation()3341 void MarkCompact::KernelPreparation() {
3342 TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
3343 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
3344 size_t moving_space_size = bump_pointer_space_->Capacity();
3345 int mode = kCopyMode;
3346 size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3347 DCHECK_LE(moving_space_register_sz, moving_space_size);
3348 if (minor_fault_initialized_) {
3349 if (shadow_to_space_map_.IsValid()) {
3350 size_t shadow_size = shadow_to_space_map_.Size();
3351 void* addr = shadow_to_space_map_.Begin();
3352 if (shadow_size < moving_space_register_sz) {
3353 addr = mremap(addr,
3354 shadow_size,
3355 moving_space_register_sz,
3356 // Don't allow moving with obj-ptr poisoning as the
3357 // mapping needs to be in <4GB address space.
3358 kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE,
3359 /*new_address=*/nullptr);
3360 if (addr != MAP_FAILED) {
3361 // Succeeded in expanding the mapping. Update the MemMap entry for shadow map.
3362 MemMap temp = MemMap::MapPlaceholder(
3363 "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz);
3364 std::swap(shadow_to_space_map_, temp);
3365 }
3366 }
3367 if (addr != MAP_FAILED) {
3368 mode = kMinorFaultMode;
3369 } else {
3370 // We are not going to use shadow map. So protect it to catch any
3371 // potential bugs.
3372 DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
3373 << "mprotect failed: " << strerror(errno);
3374 }
3375 }
3376 }
3377
3378 bool map_shared =
3379 minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_);
3380 uint8_t* shadow_addr = nullptr;
3381 if (moving_to_space_fd_ == kFdUnused && map_shared) {
3382 DCHECK(gHaveMremapDontunmap);
3383 DCHECK(shadow_to_space_map_.IsValid());
3384 DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size);
3385 shadow_addr = shadow_to_space_map_.Begin();
3386 }
3387
3388 KernelPrepareRangeForUffd(moving_space_begin,
3389 from_space_begin_,
3390 moving_space_size,
3391 moving_to_space_fd_,
3392 shadow_addr);
3393
3394 if (IsValidFd(uffd_)) {
3395 if (moving_space_register_sz > 0) {
3396 // mremap clears 'anon_vma' field of anonymous mappings. If we
3397 // uffd-register only the used portion of the space, then the vma gets
3398 // split (between used and unused portions) and as soon as pages are
3399 // mapped to the vmas, they get different `anon_vma` assigned, which
3400 // ensures that the two vmas cannot merge after we uffd-unregister the
3401 // used portion. OTOH, registering the entire space avoids the split, but
3402 // unnecessarily causes userfaults on allocations.
3403 // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
3404 // the vma-split in uffd-register. This ensures that when we unregister
3405 // the used portion after compaction, the two split vmas merge. This is
3406 // necessary for the mremap of the next GC cycle to not fail due to having
3407 // more than one vma in the source range.
3408 if (moving_space_register_sz < moving_space_size) {
3409 *const_cast<volatile uint8_t*>(moving_space_begin + moving_space_register_sz) = 0;
3410 }
3411 // Register the moving space with userfaultfd.
3412 RegisterUffd(moving_space_begin, moving_space_register_sz, mode);
3413 }
3414 // Prepare linear-alloc for concurrent compaction.
3415 for (auto& data : linear_alloc_spaces_data_) {
3416 bool mmap_again = map_shared && !data.already_shared_;
3417 DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3418 // There could be threads running in suspended mode when the compaction
3419 // pause is being executed. In order to make the userfaultfd setup atomic,
3420 // the registration has to be done *before* moving the pages to shadow map.
3421 if (!mmap_again) {
3422 // See the comment in the constructor as to why it's conditionally done.
3423 RegisterUffd(data.begin_,
3424 data.shadow_.Size(),
3425 minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
3426 }
3427 KernelPrepareRangeForUffd(data.begin_,
3428 data.shadow_.Begin(),
3429 data.shadow_.Size(),
3430 mmap_again ? kFdSharedAnon : kFdUnused);
3431 if (mmap_again) {
3432 data.already_shared_ = true;
3433 RegisterUffd(data.begin_,
3434 data.shadow_.Size(),
3435 minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
3436 }
3437 }
3438 }
3439 if (map_shared) {
3440 // Start mapping linear-alloc MAP_SHARED only after the compaction pause of
3441 // the first GC in non-zygote processes. This is the GC which sets up
3442 // mappings for using minor-fault in future. Up to this point we run
3443 // userfaultfd in copy-mode, which requires the mappings (of linear-alloc)
3444 // to be MAP_PRIVATE.
3445 map_linear_alloc_shared_ = true;
3446 }
3447 }
3448
3449 template <int kMode>
ConcurrentCompaction(uint8_t * buf)3450 void MarkCompact::ConcurrentCompaction(uint8_t* buf) {
3451 DCHECK_NE(kMode, kFallbackMode);
3452 DCHECK(kMode != kCopyMode || buf != nullptr);
3453 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3454 while (true) {
3455 struct uffd_msg msg;
3456 ssize_t nread = read(uffd_, &msg, sizeof(msg));
3457 CHECK_GT(nread, 0);
3458 CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT);
3459 DCHECK_EQ(nread, static_cast<ssize_t>(sizeof(msg)));
3460 uint8_t* fault_addr = reinterpret_cast<uint8_t*>(msg.arg.pagefault.address);
3461 if (fault_addr == conc_compaction_termination_page_) {
3462 // The counter doesn't need to be updated atomically as only one thread
3463 // would wake up against the gc-thread's load to this fault_addr. In fact,
3464 // the other threads would wake up serially because every exiting thread
3465 // will wake up gc-thread, which would retry load but again would find the
3466 // page missing. Also, the value will be flushed to caches due to the ioctl
3467 // syscall below.
3468 uint8_t ret = thread_pool_counter_--;
3469 // If 'gKernelHasFaultRetry == true' then only the last thread should map the
3470 // zeropage so that the gc-thread can proceed. Otherwise, each thread does
3471 // it and the gc-thread will repeat this fault until thread_pool_counter == 0.
3472 if (!gKernelHasFaultRetry || ret == 1) {
3473 ZeropageIoctl(fault_addr, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3474 } else {
3475 struct uffdio_range uffd_range;
3476 uffd_range.start = msg.arg.pagefault.address;
3477 uffd_range.len = gPageSize;
3478 CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0)
3479 << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: "
3480 << strerror(errno);
3481 }
3482 break;
3483 }
3484 uint8_t* fault_page = AlignDown(fault_addr, gPageSize);
3485 if (HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) {
3486 ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages);
3487 } else if (minor_fault_initialized_) {
3488 ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(
3489 fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3490 } else {
3491 ConcurrentlyProcessLinearAllocPage<kCopyMode>(
3492 fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3493 }
3494 }
3495 }
3496
SigbusHandler(siginfo_t * info)3497 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3498 class ScopedInProgressCount {
3499 public:
3500 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3501 // Increment the count only if compaction is not done yet.
3502 SigbusCounterType prev =
3503 collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed);
3504 while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3505 if (collector_->sigbus_in_progress_count_.compare_exchange_strong(
3506 prev, prev + 1, std::memory_order_acquire)) {
3507 DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3508 compaction_done_ = false;
3509 return;
3510 }
3511 }
3512 compaction_done_ = true;
3513 }
3514
3515 bool IsCompactionDone() const {
3516 return compaction_done_;
3517 }
3518
3519 ~ScopedInProgressCount() {
3520 if (!IsCompactionDone()) {
3521 collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release);
3522 }
3523 }
3524
3525 private:
3526 MarkCompact* const collector_;
3527 bool compaction_done_;
3528 };
3529
3530 DCHECK(use_uffd_sigbus_);
3531 if (info->si_code != BUS_ADRERR) {
3532 // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3533 // handled here.
3534 return false;
3535 }
3536
3537 ScopedInProgressCount spc(this);
3538 uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3539 if (!spc.IsCompactionDone()) {
3540 if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3541 Thread* self = Thread::Current();
3542 Locks::mutator_lock_->AssertSharedHeld(self);
3543 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3544 if (minor_fault_initialized_) {
3545 ConcurrentlyProcessMovingPage<kMinorFaultMode>(
3546 fault_page, nullptr, nr_moving_space_used_pages);
3547 } else {
3548 ConcurrentlyProcessMovingPage<kCopyMode>(
3549 fault_page, self->GetThreadLocalGcBuffer(), nr_moving_space_used_pages);
3550 }
3551 return true;
3552 } else {
3553 // Find the linear-alloc space containing fault-addr
3554 for (auto& data : linear_alloc_spaces_data_) {
3555 if (data.begin_ <= fault_page && data.end_ > fault_page) {
3556 if (minor_fault_initialized_) {
3557 ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false);
3558 } else {
3559 ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false);
3560 }
3561 return true;
3562 }
3563 }
3564 // Fault address doesn't belong to either moving-space or linear-alloc.
3565 return false;
3566 }
3567 } else {
3568 // We may spuriously get SIGBUS fault, which was initiated before the
3569 // compaction was finished, but ends up here. In that case, if the fault
3570 // address is valid then consider it handled.
3571 return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3572 linear_alloc_spaces_data_.end() !=
3573 std::find_if(linear_alloc_spaces_data_.begin(),
3574 linear_alloc_spaces_data_.end(),
3575 [fault_page](const LinearAllocSpaceData& data) {
3576 return data.begin_ <= fault_page && data.end_ > fault_page;
3577 });
3578 }
3579 }
3580
3581 template <int kMode>
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages)3582 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3583 uint8_t* buf,
3584 size_t nr_moving_space_used_pages) {
3585 // TODO: add a class for Scoped dtor to set that a page has already mapped.
3586 // This helps in avoiding a zero-page ioctl in gc-thread before unregistering
3587 // unused space.
3588 class ScopedInProgressCount {
3589 public:
3590 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3591 collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed);
3592 }
3593
3594 ~ScopedInProgressCount() {
3595 collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed);
3596 }
3597
3598 private:
3599 MarkCompact* collector_;
3600 };
3601
3602 Thread* self = Thread::Current();
3603 uint8_t* unused_space_begin =
3604 bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize;
3605 DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3606 DCHECK(kMode == kCopyMode || fault_page < unused_space_begin);
3607 if (kMode == kCopyMode && fault_page >= unused_space_begin) {
3608 // There is a race which allows more than one thread to install a
3609 // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3610 // the ioctl and move on.
3611 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3612 return;
3613 }
3614 size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
3615 DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3616 mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3617 if (first_obj == nullptr) {
3618 // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3619 uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3620 if (fault_page < self->GetTlabStart() || fault_page >= end) {
3621 end = fault_page + gPageSize;
3622 }
3623 size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3624 size_t length = 0;
3625 for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3626 uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3627 if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3628 DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3629 break;
3630 }
3631 }
3632 if (length > 0) {
3633 length =
3634 ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3635 for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3636 moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3637 std::memory_order_release);
3638 }
3639 }
3640 return;
3641 }
3642
3643 uint32_t raw_state = moving_pages_status_[page_idx].load(
3644 use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed);
3645 uint32_t backoff_count = 0;
3646 PageState state;
3647 while (true) {
3648 state = GetPageStateFromWord(raw_state);
3649 if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3650 state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3651 if (!use_uffd_sigbus_) {
3652 break;
3653 }
3654 // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3655 // The wait is not expected to be long as the read state indicates that the other
3656 // thread is actively working on the page.
3657 BackOff(backoff_count++);
3658 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3659 } else if (state == PageState::kProcessedAndMapped) {
3660 // Nothing to do.
3661 break;
3662 } else {
3663 // The increment to the in-progress counter must be done before updating
3664 // the page's state. Otherwise, we will end up leaving a window wherein
3665 // the GC-thread could observe that no worker is working on compaction
3666 // and could end up unregistering the moving space from userfaultfd.
3667 ScopedInProgressCount spc(this);
3668 // Acquire order to ensure we don't start writing to shadow map, which is
3669 // shared, before the CAS is successful. Release order to ensure that the
3670 // increment to moving_compaction_in_progress above is not re-ordered
3671 // after the CAS.
3672 if (state == PageState::kUnprocessed &&
3673 moving_pages_status_[page_idx].compare_exchange_strong(
3674 raw_state,
3675 static_cast<uint8_t>(PageState::kMutatorProcessing),
3676 std::memory_order_acq_rel)) {
3677 if (kMode == kMinorFaultMode) {
3678 DCHECK_EQ(buf, nullptr);
3679 buf = shadow_to_space_map_.Begin() + page_idx * gPageSize;
3680 } else if (UNLIKELY(buf == nullptr)) {
3681 DCHECK_EQ(kMode, kCopyMode);
3682 uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3683 // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3684 CHECK_LE(idx, kMutatorCompactionBufferCount);
3685 buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3686 DCHECK(compaction_buffers_map_.HasAddress(buf));
3687 self->SetThreadLocalGcBuffer(buf);
3688 }
3689
3690 if (fault_page < post_compact_end_) {
3691 // The page has to be compacted.
3692 CompactPage(
3693 first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
3694 } else {
3695 DCHECK_NE(first_obj, nullptr);
3696 DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3697 uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3698 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3699 mirror::Object* next_page_first_obj = nullptr;
3700 if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3701 next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3702 }
3703 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3704 SlideBlackPage(first_obj,
3705 next_page_first_obj,
3706 first_chunk_size,
3707 pre_compact_page,
3708 buf,
3709 kMode == kCopyMode);
3710 }
3711 // Nobody else would simultaneously modify this page's state so an
3712 // atomic store is sufficient. Use 'release' order to guarantee that
3713 // loads/stores to the page are finished before this store. Since the
3714 // mutator used its own buffer for the processing, there is no reason to
3715 // put its index in the status of the page. Also, the mutator is going
3716 // to immediately map the page, so that info is not needed.
3717 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3718 std::memory_order_release);
3719 if (kMode == kCopyMode) {
3720 CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false);
3721 // Store is sufficient as no other thread modifies the status at this stage.
3722 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3723 std::memory_order_release);
3724 break;
3725 } else {
3726 // We don't support minor-fault feature anymore.
3727 UNREACHABLE();
3728 }
3729 }
3730 state = GetPageStateFromWord(raw_state);
3731 if (state == PageState::kProcessed) {
3732 size_t arr_len = moving_first_objs_count_ + black_page_count_;
3733 // The page is processed but not mapped. We should map it. The release
3734 // order used in MapMovingSpacePages will ensure that the increment to
3735 // moving_compaction_in_progress is done first.
3736 if (MapMovingSpacePages(page_idx,
3737 arr_len,
3738 /*from_fault=*/true,
3739 /*return_on_contention=*/false) > 0) {
3740 break;
3741 }
3742 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3743 }
3744 }
3745 }
3746 }
3747
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl)3748 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3749 uint8_t* start_shadow_page,
3750 Atomic<PageState>* state,
3751 size_t length,
3752 bool free_pages,
3753 bool single_ioctl) {
3754 DCHECK(!minor_fault_initialized_);
3755 DCHECK_ALIGNED_PARAM(length, gPageSize);
3756 Atomic<PageState>* madv_state = state;
3757 size_t madv_len = length;
3758 uint8_t* madv_start = start_shadow_page;
3759 bool check_state_for_madv = false;
3760 uint8_t* end_page = start_page + length;
3761 while (start_page < end_page) {
3762 size_t map_len = 0;
3763 // Find a contiguous range of pages that we can map in single ioctl.
3764 for (Atomic<PageState>* cur_state = state;
3765 map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3766 map_len += gPageSize, cur_state++) {
3767 // No body.
3768 }
3769
3770 if (map_len == 0) {
3771 if (single_ioctl) {
3772 return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3773 }
3774 // Skip all the pages that this thread can't map.
3775 while (length > 0) {
3776 PageState s = state->load(std::memory_order_relaxed);
3777 if (s == PageState::kProcessed) {
3778 break;
3779 }
3780 // If we find any page which is being processed or mapped (only possible by a mutator(s))
3781 // then we need to re-check the page-state and, if needed, wait for the state to change
3782 // to 'mapped', before the shadow pages are reclaimed.
3783 check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3784 state++;
3785 length -= gPageSize;
3786 start_shadow_page += gPageSize;
3787 start_page += gPageSize;
3788 }
3789 } else {
3790 map_len = CopyIoctl(start_page,
3791 start_shadow_page,
3792 map_len,
3793 /*return_on_contention=*/false);
3794 DCHECK_NE(map_len, 0u);
3795 if (use_uffd_sigbus_) {
3796 // Declare that the pages are ready to be accessed. Store is sufficient
3797 // as any thread will be storing the same value.
3798 for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3799 PageState s = state->load(std::memory_order_relaxed);
3800 DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped)
3801 << "state:" << s;
3802 state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3803 }
3804 } else {
3805 state += DivideByPageSize(map_len);
3806 }
3807 if (single_ioctl) {
3808 break;
3809 }
3810 start_page += map_len;
3811 start_shadow_page += map_len;
3812 length -= map_len;
3813 // state is already updated above.
3814 }
3815 }
3816 if (free_pages) {
3817 if (check_state_for_madv) {
3818 // Wait until all the pages are mapped before releasing them. This is needed to be
3819 // checked only if some mutators were found to be concurrently mapping pages earlier.
3820 for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3821 uint32_t backoff_count = 0;
3822 PageState s = madv_state->load(std::memory_order_relaxed);
3823 while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3824 BackOff(backoff_count++);
3825 s = madv_state->load(std::memory_order_relaxed);
3826 }
3827 }
3828 }
3829 ZeroAndReleaseMemory(madv_start, madv_len);
3830 }
3831 return true;
3832 }
3833
3834 template <int kMode>
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool is_minor_fault)3835 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) {
3836 DCHECK(!is_minor_fault || kMode == kMinorFaultMode);
3837 auto arena_iter = linear_alloc_arenas_.end();
3838 {
3839 TrackedArena temp_arena(fault_page);
3840 arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3841 arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3842 : linear_alloc_arenas_.end();
3843 }
3844 // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3845 // because a thread trying to access the page and as a result causing this
3846 // userfault confirms that nobody can delete the corresponding arena and
3847 // release its pages.
3848 // NOTE: We may have some memory range be recycled several times during a
3849 // compaction cycle, thereby potentially causing userfault on the same page
3850 // several times. That's not a problem as all of them (except for possibly the
3851 // first one) would require us mapping a zero-page, which we do without updating
3852 // the 'state_arr'.
3853 if (arena_iter == linear_alloc_arenas_.end() ||
3854 arena_iter->first->IsWaitingForDeletion() ||
3855 arena_iter->second <= fault_page) {
3856 // Fault page isn't in any of the arenas that existed before we started
3857 // compaction. So map zeropage and return.
3858 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
3859 } else {
3860 // Find the linear-alloc space containing fault-page
3861 LinearAllocSpaceData* space_data = nullptr;
3862 for (auto& data : linear_alloc_spaces_data_) {
3863 if (data.begin_ <= fault_page && fault_page < data.end_) {
3864 space_data = &data;
3865 break;
3866 }
3867 }
3868 DCHECK_NE(space_data, nullptr);
3869 ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3870 size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3871 Atomic<PageState>* state_arr =
3872 reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3873 PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire :
3874 std::memory_order_relaxed);
3875 uint32_t backoff_count = 0;
3876 while (true) {
3877 switch (state) {
3878 case PageState::kUnprocessed: {
3879 // Acquire order to ensure we don't start writing to shadow map, which is
3880 // shared, before the CAS is successful.
3881 if (state_arr[page_idx].compare_exchange_strong(
3882 state, PageState::kProcessing, std::memory_order_acquire)) {
3883 if (kMode == kCopyMode || is_minor_fault) {
3884 LinearAllocPageUpdater updater(this);
3885 uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3886 // null first_obj indicates that it's a page from arena for
3887 // intern-table/class-table. So first object isn't required.
3888 if (first_obj != nullptr) {
3889 updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3890 } else {
3891 updater.SingleObjectArena(fault_page + diff, gPageSize);
3892 }
3893 if (kMode == kCopyMode) {
3894 if (updater.WasLastPageTouched()) {
3895 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3896 state = PageState::kProcessed;
3897 continue;
3898 } else {
3899 // If the page wasn't touched, then it means it is empty and
3900 // is most likely not present on the shadow-side. Furthermore,
3901 // since the shadow is also userfaultfd registered doing copy
3902 // ioctl fails as the copy-from-user in the kernel will cause
3903 // userfault. Instead, just map a zeropage, which is not only
3904 // correct but also efficient as it avoids unnecessary memcpy
3905 // in the kernel.
3906 ZeropageIoctl(
3907 fault_page, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3908 state_arr[page_idx].store(PageState::kProcessedAndMapped,
3909 std::memory_order_release);
3910 return;
3911 }
3912 }
3913 } else {
3914 // Don't touch the page in this case (there is no reason to do so
3915 // anyways) as it would mean reading from first_obj, which could be on
3916 // another missing page and hence may cause this thread to block, leading
3917 // to deadlocks.
3918 // Force read the page if it is missing so that a zeropage gets mapped on
3919 // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3920 ForceRead(fault_page + diff);
3921 }
3922 MapProcessedPages</*kFirstPageMapping=*/true>(
3923 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3924 return;
3925 }
3926 }
3927 continue;
3928 case PageState::kProcessed:
3929 // Map as many pages as possible in a single ioctl, without spending
3930 // time freeing pages.
3931 if (MapUpdatedLinearAllocPages(fault_page,
3932 fault_page + diff,
3933 state_arr + page_idx,
3934 space_data->end_ - fault_page,
3935 /*free_pages=*/false,
3936 /*single_ioctl=*/true)) {
3937 return;
3938 }
3939 // fault_page was not mapped by this thread (some other thread claimed
3940 // it). Wait for it to be mapped before returning.
3941 FALLTHROUGH_INTENDED;
3942 case PageState::kProcessing:
3943 case PageState::kProcessingAndMapping:
3944 case PageState::kProcessedAndMapping:
3945 if (use_uffd_sigbus_) {
3946 // Wait for the page to be mapped before returning.
3947 BackOff(backoff_count++);
3948 state = state_arr[page_idx].load(std::memory_order_acquire);
3949 continue;
3950 }
3951 return;
3952 case PageState::kMutatorProcessing:
3953 LOG(FATAL) << "Unreachable";
3954 UNREACHABLE();
3955 case PageState::kProcessedAndMapped:
3956 // Somebody else took care of the page.
3957 return;
3958 }
3959 break;
3960 }
3961
3962 DCHECK_EQ(kMode, kMinorFaultMode);
3963 DCHECK_EQ(state, PageState::kProcessed);
3964 if (!is_minor_fault) {
3965 // Force read the page if it is missing so that a zeropage gets mapped on
3966 // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3967 ForceRead(fault_page + diff);
3968 }
3969 MapProcessedPages</*kFirstPageMapping=*/false>(
3970 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3971 }
3972 }
3973
ProcessLinearAlloc()3974 void MarkCompact::ProcessLinearAlloc() {
3975 GcVisitedArenaPool* arena_pool =
3976 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3977 DCHECK_EQ(thread_running_gc_, Thread::Current());
3978 uint8_t* unmapped_range_start = nullptr;
3979 uint8_t* unmapped_range_end = nullptr;
3980 // Pointer to the linear-alloc space containing the current arena in the loop
3981 // below. Also helps in ensuring that two arenas, which are contiguous in
3982 // address space but are from different linear-alloc spaces, are not coalesced
3983 // into one range for mapping purpose.
3984 LinearAllocSpaceData* space_data = nullptr;
3985 Atomic<PageState>* state_arr = nullptr;
3986 ptrdiff_t diff = 0;
3987
3988 auto map_pages = [&]() {
3989 DCHECK_NE(diff, 0);
3990 DCHECK_NE(space_data, nullptr);
3991 DCHECK_GE(unmapped_range_start, space_data->begin_);
3992 DCHECK_LT(unmapped_range_start, space_data->end_);
3993 DCHECK_GT(unmapped_range_end, space_data->begin_);
3994 DCHECK_LE(unmapped_range_end, space_data->end_);
3995 DCHECK_LT(unmapped_range_start, unmapped_range_end);
3996 DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3997 size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3998 MapUpdatedLinearAllocPages(unmapped_range_start,
3999 unmapped_range_start + diff,
4000 state_arr + page_idx,
4001 unmapped_range_end - unmapped_range_start,
4002 /*free_pages=*/true,
4003 /*single_ioctl=*/false);
4004 };
4005 for (auto& pair : linear_alloc_arenas_) {
4006 const TrackedArena* arena = pair.first;
4007 size_t arena_size = arena->Size();
4008 uint8_t* arena_begin = arena->Begin();
4009 // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
4010 // in that order.
4011 DCHECK_LE(unmapped_range_end, arena_begin);
4012 DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
4013 << "space-begin:" << static_cast<void*>(space_data->begin_)
4014 << " arena-begin:" << static_cast<void*>(arena_begin);
4015 if (space_data == nullptr || space_data->end_ <= arena_begin) {
4016 // Map the processed arenas as we are switching to another space.
4017 if (space_data != nullptr && unmapped_range_end != nullptr) {
4018 map_pages();
4019 unmapped_range_end = nullptr;
4020 }
4021 // Find the linear-alloc space containing the arena
4022 LinearAllocSpaceData* curr_space_data = space_data;
4023 for (auto& data : linear_alloc_spaces_data_) {
4024 if (data.begin_ <= arena_begin && arena_begin < data.end_) {
4025 // Since arenas are sorted, the next space should be higher in address
4026 // order than the current one.
4027 DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
4028 diff = data.shadow_.Begin() - data.begin_;
4029 state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
4030 space_data = &data;
4031 break;
4032 }
4033 }
4034 CHECK_NE(space_data, curr_space_data)
4035 << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
4036 }
4037 // Map the processed arenas if we found a hole within the current space.
4038 if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
4039 map_pages();
4040 unmapped_range_end = nullptr;
4041 }
4042 if (unmapped_range_end == nullptr) {
4043 unmapped_range_start = unmapped_range_end = arena_begin;
4044 }
4045 DCHECK_NE(unmapped_range_start, nullptr);
4046 // It's ok to include all arenas in the unmapped range. Since the
4047 // corresponding state bytes will be kUnprocessed, we will skip calling
4048 // ioctl and madvise on arenas which are waiting to be deleted.
4049 unmapped_range_end += arena_size;
4050 {
4051 // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
4052 // does not get deleted at the same time. If this critical section is too
4053 // long and impacts mutator response time, then we get rid of this lock by
4054 // holding onto memory ranges of all deleted (since compaction pause)
4055 // arenas until completion finishes.
4056 ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
4057 // If any arenas were freed since compaction pause then skip them from
4058 // visiting.
4059 if (arena->IsWaitingForDeletion()) {
4060 continue;
4061 }
4062 uint8_t* last_byte = pair.second;
4063 DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
4064 auto visitor = [space_data, last_byte, diff, this, state_arr](
4065 uint8_t* page_begin,
4066 uint8_t* first_obj,
4067 size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
4068 // No need to process pages past last_byte as they already have updated
4069 // gc-roots, if any.
4070 if (page_begin >= last_byte) {
4071 return;
4072 }
4073 LinearAllocPageUpdater updater(this);
4074 size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
4075 DCHECK_LT(page_idx, space_data->page_status_map_.Size());
4076 PageState expected_state = PageState::kUnprocessed;
4077 // Acquire order to ensure that we don't start accessing the shadow page,
4078 // which is shared with other threads, prior to CAS. Also, for same
4079 // reason, we used 'release' order for changing the state to 'processed'.
4080 if (state_arr[page_idx].compare_exchange_strong(
4081 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
4082 // null first_obj indicates that it's a page from arena for
4083 // intern-table/class-table. So first object isn't required.
4084 if (first_obj != nullptr) {
4085 updater.MultiObjectArena(page_begin + diff, first_obj + diff);
4086 } else {
4087 DCHECK_EQ(page_size, gPageSize);
4088 updater.SingleObjectArena(page_begin + diff, page_size);
4089 }
4090 expected_state = PageState::kProcessing;
4091 if (!minor_fault_initialized_) {
4092 // Store is sufficient as no other thread could be modifying it. Use
4093 // release order to ensure that the writes to shadow page are
4094 // committed to memory before.
4095 if (updater.WasLastPageTouched()) {
4096 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
4097 } else {
4098 // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
4099 ZeropageIoctl(
4100 page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
4101 // Ioctl will act as release fence.
4102 state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
4103 }
4104 } else if (!state_arr[page_idx].compare_exchange_strong(
4105 expected_state, PageState::kProcessed, std::memory_order_release)) {
4106 DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
4107 // Force read in case the page was missing and updater didn't touch it
4108 // as there was nothing to do. This will ensure that a zeropage is
4109 // faulted on the shadow map.
4110 ForceRead(page_begin + diff);
4111 MapProcessedPages</*kFirstPageMapping=*/true>(
4112 page_begin, state_arr, page_idx, space_data->page_status_map_.Size());
4113 }
4114 }
4115 };
4116
4117 arena->VisitRoots(visitor);
4118 }
4119 }
4120 if (unmapped_range_end > unmapped_range_start) {
4121 // Map remaining pages.
4122 map_pages();
4123 }
4124 }
4125
RegisterUffd(void * addr,size_t size,int mode)4126 void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) {
4127 DCHECK(IsValidFd(uffd_));
4128 struct uffdio_register uffd_register;
4129 uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
4130 uffd_register.range.len = size;
4131 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
4132 if (mode == kMinorFaultMode) {
4133 uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR;
4134 }
4135 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
4136 << "ioctl_userfaultfd: register failed: " << strerror(errno)
4137 << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
4138 }
4139
4140 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
4141 // when we unregister the unused portion of the moving-space). Implement support
4142 // for that.
UnregisterUffd(uint8_t * start,size_t len)4143 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
4144 DCHECK(IsValidFd(uffd_));
4145 struct uffdio_range range;
4146 range.start = reinterpret_cast<uintptr_t>(start);
4147 range.len = len;
4148 CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
4149 << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
4150 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4151 // Due to an oversight in the kernel implementation of 'unregister', the
4152 // waiting threads are woken up only for copy uffds. Therefore, for now, we
4153 // have to explicitly wake up the threads in minor-fault case.
4154 // TODO: The fix in the kernel is being worked on. Once the kernel version
4155 // containing the fix is known, make it conditional on that as well.
4156 if (minor_fault_initialized_) {
4157 CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0)
4158 << "ioctl_userfaultfd: wake failed: " << strerror(errno)
4159 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4160 }
4161 }
4162
CompactionPhase()4163 void MarkCompact::CompactionPhase() {
4164 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4165 {
4166 int32_t freed_bytes = black_objs_slide_diff_;
4167 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
4168 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
4169 }
4170
4171 if (CanCompactMovingSpaceWithMinorFault()) {
4172 CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr);
4173 } else {
4174 CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
4175 }
4176
4177 ProcessLinearAlloc();
4178
4179 if (use_uffd_sigbus_) {
4180 // Set compaction-done bit so that no new mutator threads start compaction
4181 // process in the SIGBUS handler.
4182 SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask,
4183 std::memory_order_acq_rel);
4184 // Wait for SIGBUS handlers already in play.
4185 for (uint32_t i = 0; count > 0; i++) {
4186 BackOff(i);
4187 count = sigbus_in_progress_count_.load(std::memory_order_acquire);
4188 count &= ~kSigbusCounterCompactionDoneMask;
4189 }
4190 } else {
4191 DCHECK(IsAlignedParam(conc_compaction_termination_page_, gPageSize));
4192 // We will only iterate once if gKernelHasFaultRetry is true.
4193 do {
4194 // madvise the page so that we can get userfaults on it.
4195 ZeroAndReleaseMemory(conc_compaction_termination_page_, gPageSize);
4196 // The following load triggers 'special' userfaults. When received by the
4197 // thread-pool workers, they will exit out of the compaction task. This fault
4198 // happens because we madvised the page.
4199 ForceRead(conc_compaction_termination_page_);
4200 } while (thread_pool_counter_ > 0);
4201 }
4202 // Unregister linear-alloc spaces
4203 for (auto& data : linear_alloc_spaces_data_) {
4204 DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
4205 UnregisterUffd(data.begin_, data.shadow_.Size());
4206 // madvise linear-allocs's page-status array. Note that we don't need to
4207 // madvise the shado-map as the pages from it were reclaimed in
4208 // ProcessLinearAlloc() after arenas were mapped.
4209 data.page_status_map_.MadviseDontNeedAndZero();
4210 if (minor_fault_initialized_) {
4211 DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0)
4212 << "mprotect failed: " << strerror(errno);
4213 }
4214 }
4215
4216 // Make sure no mutator is reading from the from-space before unregistering
4217 // userfaultfd from moving-space and then zapping from-space. The mutator
4218 // and GC may race to set a page state to processing or further along. The two
4219 // attempts are ordered. If the collector wins, then the mutator will see that
4220 // and not access the from-space page. If the muator wins, then the
4221 // compaction_in_progress_count_ increment by the mutator happens-before the test
4222 // here, and we will not see a zero value until the mutator has completed.
4223 for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) {
4224 BackOff(i);
4225 }
4226 size_t moving_space_size = bump_pointer_space_->Capacity();
4227 size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
4228 if (used_size > 0) {
4229 UnregisterUffd(bump_pointer_space_->Begin(), used_size);
4230 }
4231 // Release all of the memory taken by moving-space's from-map
4232 if (minor_fault_initialized_) {
4233 if (IsValidFd(moving_from_space_fd_)) {
4234 // A strange behavior is observed wherein between GC cycles the from-space'
4235 // first page is accessed. But the memfd that is mapped on from-space, is
4236 // used on to-space in next GC cycle, causing issues with userfaultfd as the
4237 // page isn't missing. A possible reason for this could be prefetches. The
4238 // mprotect ensures that such accesses don't succeed.
4239 int ret = mprotect(from_space_begin_, moving_space_size, PROT_NONE);
4240 CHECK_EQ(ret, 0) << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4241 // madvise(MADV_REMOVE) needs PROT_WRITE. Use fallocate() instead, which
4242 // does the same thing.
4243 ret = fallocate(moving_from_space_fd_,
4244 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
4245 /*offset=*/0,
4246 moving_space_size);
4247 CHECK_EQ(ret, 0) << "fallocate for from-space failed: " << strerror(errno);
4248 } else {
4249 // We don't have a valid fd, so use madvise(MADV_REMOVE) instead. mprotect
4250 // is not required in this case as we create fresh
4251 // MAP_SHARED+MAP_ANONYMOUS mapping in each GC cycle.
4252 int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE);
4253 CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno);
4254 }
4255 } else {
4256 from_space_map_.MadviseDontNeedAndZero();
4257 }
4258 // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
4259 if (shadow_to_space_map_.IsValid()) {
4260 DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
4261 << "mprotect(PROT_NONE) for shadow-map failed:" << strerror(errno);
4262 }
4263 if (!IsValidFd(moving_from_space_fd_)) {
4264 // The other case is already mprotected above.
4265 DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
4266 << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4267 }
4268
4269 if (!use_uffd_sigbus_) {
4270 heap_->GetThreadPool()->StopWorkers(thread_running_gc_);
4271 }
4272 }
4273
4274 template <size_t kBufferSize>
4275 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
4276 public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)4277 explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
4278 : mark_compact_(mark_compact), self_(self) {}
4279
~ThreadRootsVisitor()4280 ~ThreadRootsVisitor() {
4281 Flush();
4282 }
4283
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4284 void VisitRoots(mirror::Object*** roots,
4285 size_t count,
4286 [[maybe_unused]] const RootInfo& info) override
4287 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4288 for (size_t i = 0; i < count; i++) {
4289 mirror::Object* obj = *roots[i];
4290 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4291 Push(obj);
4292 }
4293 }
4294 }
4295
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4296 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4297 size_t count,
4298 [[maybe_unused]] const RootInfo& info) override
4299 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4300 for (size_t i = 0; i < count; i++) {
4301 mirror::Object* obj = roots[i]->AsMirrorPtr();
4302 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4303 Push(obj);
4304 }
4305 }
4306 }
4307
4308 private:
Flush()4309 void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
4310 REQUIRES(Locks::heap_bitmap_lock_) {
4311 StackReference<mirror::Object>* start;
4312 StackReference<mirror::Object>* end;
4313 {
4314 MutexLock mu(self_, mark_compact_->lock_);
4315 // Loop here because even after expanding once it may not be sufficient to
4316 // accommodate all references. It's almost impossible, but there is no harm
4317 // in implementing it this way.
4318 while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
4319 mark_compact_->ExpandMarkStack();
4320 }
4321 }
4322 while (idx_ > 0) {
4323 *start++ = roots_[--idx_];
4324 }
4325 DCHECK_EQ(start, end);
4326 }
4327
Push(mirror::Object * obj)4328 void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
4329 REQUIRES(Locks::heap_bitmap_lock_) {
4330 if (UNLIKELY(idx_ >= kBufferSize)) {
4331 Flush();
4332 }
4333 roots_[idx_++].Assign(obj);
4334 }
4335
4336 StackReference<mirror::Object> roots_[kBufferSize];
4337 size_t idx_ = 0;
4338 MarkCompact* const mark_compact_;
4339 Thread* const self_;
4340 };
4341
4342 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
4343 public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)4344 explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
4345
Run(Thread * thread)4346 void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
4347 ScopedTrace trace("Marking thread roots");
4348 // Note: self is not necessarily equal to thread since thread may be
4349 // suspended.
4350 Thread* const self = Thread::Current();
4351 CHECK(thread == self
4352 || thread->IsSuspended()
4353 || thread->GetState() == ThreadState::kWaitingPerformingGc)
4354 << thread->GetState() << " thread " << thread << " self " << self;
4355 {
4356 ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
4357 thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
4358 }
4359 // Clear page-buffer to prepare for compaction phase.
4360 thread->SetThreadLocalGcBuffer(nullptr);
4361
4362 // If thread is a running mutator, then act on behalf of the garbage
4363 // collector. See the code in ThreadList::RunCheckpoint.
4364 mark_compact_->GetBarrier().Pass(self);
4365 }
4366
4367 private:
4368 MarkCompact* const mark_compact_;
4369 };
4370
MarkRootsCheckpoint(Thread * self,Runtime * runtime)4371 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
4372 // We revote TLABs later during paused round of marking.
4373 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4374 CheckpointMarkThreadRoots check_point(this);
4375 ThreadList* thread_list = runtime->GetThreadList();
4376 gc_barrier_.Init(self, 0);
4377 // Request the check point is run on all threads returning a count of the threads that must
4378 // run through the barrier including self.
4379 size_t barrier_count = thread_list->RunCheckpoint(&check_point);
4380 // Release locks then wait for all mutator threads to pass the barrier.
4381 // If there are no threads to wait which implys that all the checkpoint functions are finished,
4382 // then no need to release locks.
4383 if (barrier_count == 0) {
4384 return;
4385 }
4386 Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
4387 Locks::mutator_lock_->SharedUnlock(self);
4388 {
4389 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
4390 gc_barrier_.Increment(self, barrier_count);
4391 }
4392 Locks::mutator_lock_->SharedLock(self);
4393 Locks::heap_bitmap_lock_->ExclusiveLock(self);
4394 }
4395
MarkNonThreadRoots(Runtime * runtime)4396 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
4397 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4398 runtime->VisitNonThreadRoots(this);
4399 }
4400
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)4401 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
4402 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4403 runtime->VisitConcurrentRoots(this, flags);
4404 }
4405
RevokeAllThreadLocalBuffers()4406 void MarkCompact::RevokeAllThreadLocalBuffers() {
4407 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4408 bump_pointer_space_->RevokeAllThreadLocalBuffers();
4409 }
4410
4411 class MarkCompact::ScanObjectVisitor {
4412 public:
ScanObjectVisitor(MarkCompact * const mark_compact)4413 explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
4414 : mark_compact_(mark_compact) {}
4415
operator ()(ObjPtr<mirror::Object> obj) const4416 void operator()(ObjPtr<mirror::Object> obj) const
4417 ALWAYS_INLINE
4418 REQUIRES(Locks::heap_bitmap_lock_)
4419 REQUIRES_SHARED(Locks::mutator_lock_) {
4420 mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
4421 }
4422
4423 private:
4424 MarkCompact* const mark_compact_;
4425 };
4426
UpdateAndMarkModUnion()4427 void MarkCompact::UpdateAndMarkModUnion() {
4428 accounting::CardTable* const card_table = heap_->GetCardTable();
4429 for (const auto& space : immune_spaces_.GetSpaces()) {
4430 const char* name = space->IsZygoteSpace()
4431 ? "UpdateAndMarkZygoteModUnionTable"
4432 : "UpdateAndMarkImageModUnionTable";
4433 DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
4434 TimingLogger::ScopedTiming t(name, GetTimings());
4435 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
4436 if (table != nullptr) {
4437 // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
4438 // that's fine because these objects are immutable enough (referent can
4439 // only be cleared) and hence the only referents they can have are intra-space.
4440 table->UpdateAndMarkReferences(this);
4441 } else {
4442 // No mod-union table, scan all dirty/aged cards in the corresponding
4443 // card-table. This can only occur for app images.
4444 card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
4445 space->Begin(),
4446 space->End(),
4447 ScanObjectVisitor(this),
4448 gc::accounting::CardTable::kCardAged);
4449 }
4450 }
4451 }
4452
MarkReachableObjects()4453 void MarkCompact::MarkReachableObjects() {
4454 UpdateAndMarkModUnion();
4455 // Recursively mark all the non-image bits set in the mark bitmap.
4456 ProcessMarkStack();
4457 }
4458
ScanDirtyObjects(bool paused,uint8_t minimum_age)4459 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
4460 accounting::CardTable* card_table = heap_->GetCardTable();
4461 for (const auto& space : heap_->GetContinuousSpaces()) {
4462 const char* name = nullptr;
4463 switch (space->GetGcRetentionPolicy()) {
4464 case space::kGcRetentionPolicyNeverCollect:
4465 name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
4466 break;
4467 case space::kGcRetentionPolicyFullCollect:
4468 name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
4469 break;
4470 case space::kGcRetentionPolicyAlwaysCollect:
4471 name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
4472 break;
4473 }
4474 TimingLogger::ScopedTiming t(name, GetTimings());
4475 card_table->Scan</*kClearCard*/ false>(
4476 space->GetMarkBitmap(), space->Begin(), space->End(), ScanObjectVisitor(this), minimum_age);
4477 }
4478 }
4479
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)4480 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
4481 ScanDirtyObjects(paused, minimum_age);
4482 ProcessMarkStack();
4483 }
4484
MarkRoots(VisitRootFlags flags)4485 void MarkCompact::MarkRoots(VisitRootFlags flags) {
4486 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4487 Runtime* runtime = Runtime::Current();
4488 // Make sure that the checkpoint which collects the stack roots is the first
4489 // one capturning GC-roots. As this one is supposed to find the address
4490 // everything allocated after that (during this marking phase) will be
4491 // considered 'marked'.
4492 MarkRootsCheckpoint(thread_running_gc_, runtime);
4493 MarkNonThreadRoots(runtime);
4494 MarkConcurrentRoots(flags, runtime);
4495 }
4496
PreCleanCards()4497 void MarkCompact::PreCleanCards() {
4498 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4499 CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
4500 // Age the card-table before thread stack scanning checkpoint in MarkRoots()
4501 // as it ensures that there are no in-progress write barriers which started
4502 // prior to aging the card-table.
4503 PrepareCardTableForMarking(/*clear_alloc_space_cards*/ false);
4504 MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
4505 RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
4506 }
4507
4508 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
4509 // in this case, then we need a stop-the-world (STW) round in the end to mark
4510 // objects which were written into concurrently while concurrent marking was
4511 // performed.
4512 // In order to minimize the pause time, we could take one of the two approaches:
4513 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
4514 // below a threshold.
4515 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
4516 // that it's taking too long, then resume mutators and retry.
4517 //
4518 // Given the non-trivial fixed overhead of running a round (card table and root
4519 // scan), it might be better to go with approach 2.
MarkingPhase()4520 void MarkCompact::MarkingPhase() {
4521 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4522 DCHECK_EQ(thread_running_gc_, Thread::Current());
4523 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
4524 MaybeClampGcStructures();
4525 PrepareCardTableForMarking(/*clear_alloc_space_cards*/ true);
4526 MarkZygoteLargeObjects();
4527 MarkRoots(
4528 static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
4529 MarkReachableObjects();
4530 // Pre-clean dirtied cards to reduce pauses.
4531 PreCleanCards();
4532
4533 // Setup reference processing and forward soft references once before enabling
4534 // slow path (in MarkingPause)
4535 ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
4536 bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
4537 rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
4538 if (!clear_soft_references) {
4539 // Forward as many SoftReferences as possible before inhibiting reference access.
4540 rp->ForwardSoftReferences(GetTimings());
4541 }
4542 }
4543
4544 class MarkCompact::RefFieldsVisitor {
4545 public:
RefFieldsVisitor(MarkCompact * const mark_compact)4546 ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
4547 : mark_compact_(mark_compact) {}
4548
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4549 ALWAYS_INLINE void operator()(mirror::Object* obj,
4550 MemberOffset offset,
4551 [[maybe_unused]] bool is_static) const
4552 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4553 if (kCheckLocks) {
4554 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4555 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4556 }
4557 mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
4558 }
4559
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4560 void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
4561 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4562 mark_compact_->DelayReferenceReferent(klass, ref);
4563 }
4564
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4565 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
4566 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4567 if (!root->IsNull()) {
4568 VisitRoot(root);
4569 }
4570 }
4571
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4572 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4573 REQUIRES(Locks::heap_bitmap_lock_)
4574 REQUIRES_SHARED(Locks::mutator_lock_) {
4575 if (kCheckLocks) {
4576 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4577 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4578 }
4579 mark_compact_->MarkObject(root->AsMirrorPtr());
4580 }
4581
4582 private:
4583 MarkCompact* const mark_compact_;
4584 };
4585
4586 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4587 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4588 const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4589 size_t words = 0;
4590 for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4591 words += POPCOUNT(Bitmap::Begin()[index + i]);
4592 }
4593 return words * kAlignment;
4594 }
4595
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4596 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4597 DCHECK(obj != nullptr);
4598 DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4599 uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4600 UpdateClassAfterObjectMap(obj);
4601 size_t size = RoundUp(obj_size, kAlignment);
4602 uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4603 size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4604 // Compute the bit-index within the chunk-info vector word.
4605 bit_index %= kBitsPerVectorWord;
4606 size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4607
4608 chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4609 DCHECK_LE(first_chunk_portion, size);
4610 for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4611 DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4612 chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4613 }
4614 chunk_info_vec_[chunk_idx] += size;
4615 freed_objects_--;
4616 }
4617
4618 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4619 void MarkCompact::ScanObject(mirror::Object* obj) {
4620 // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4621 // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4622 // here and pass that information to `UpdateLivenessInfo`.
4623 size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4624 bytes_scanned_ += obj_size;
4625
4626 RefFieldsVisitor visitor(this);
4627 DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4628 if (kUpdateLiveWords && HasAddress(obj)) {
4629 UpdateLivenessInfo(obj, obj_size);
4630 }
4631 obj->VisitReferences(visitor, visitor);
4632 }
4633
4634 // Scan anything that's on the mark stack.
ProcessMarkStack()4635 void MarkCompact::ProcessMarkStack() {
4636 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4637 // TODO: try prefetch like in CMS
4638 while (!mark_stack_->IsEmpty()) {
4639 mirror::Object* obj = mark_stack_->PopBack();
4640 DCHECK(obj != nullptr);
4641 ScanObject</*kUpdateLiveWords*/ true>(obj);
4642 }
4643 }
4644
ExpandMarkStack()4645 void MarkCompact::ExpandMarkStack() {
4646 const size_t new_size = mark_stack_->Capacity() * 2;
4647 std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4648 mark_stack_->End());
4649 mark_stack_->Resize(new_size);
4650 for (auto& ref : temp) {
4651 mark_stack_->PushBack(ref.AsMirrorPtr());
4652 }
4653 DCHECK(!mark_stack_->IsFull());
4654 }
4655
PushOnMarkStack(mirror::Object * obj)4656 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4657 if (UNLIKELY(mark_stack_->IsFull())) {
4658 ExpandMarkStack();
4659 }
4660 mark_stack_->PushBack(obj);
4661 }
4662
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4663 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4664 mirror::Object* holder,
4665 MemberOffset offset) {
4666 DCHECK(obj != nullptr);
4667 if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4668 PushOnMarkStack(obj);
4669 }
4670 }
4671
4672 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4673 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4674 mirror::Object* holder,
4675 MemberOffset offset) {
4676 // We expect most of the referenes to be in bump-pointer space, so try that
4677 // first to keep the cost of this function minimal.
4678 if (LIKELY(HasAddress(obj))) {
4679 return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4680 : !moving_space_bitmap_->Set(obj);
4681 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4682 return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4683 : !non_moving_space_bitmap_->Set(obj);
4684 } else if (immune_spaces_.ContainsObject(obj)) {
4685 DCHECK(IsMarked(obj) != nullptr);
4686 return false;
4687 } else {
4688 // Must be a large-object space, otherwise it's a case of heap corruption.
4689 if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4690 // Objects in large-object space are aligned to the large-object alignment.
4691 // So if we have an object which doesn't belong to any space and is not
4692 // page-aligned as well, then it's memory corruption.
4693 // TODO: implement protect/unprotect in bump-pointer space.
4694 heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4695 }
4696 DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4697 << "ref=" << obj
4698 << " doesn't belong to any of the spaces and large object space doesn't exist";
4699 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4700 DCHECK(los_bitmap->HasAddress(obj));
4701 if (kParallel) {
4702 los_bitmap->AtomicTestAndSet(obj);
4703 } else {
4704 los_bitmap->Set(obj);
4705 }
4706 // We only have primitive arrays in large object space. So there is no
4707 // reason to push into mark-stack.
4708 DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4709 return false;
4710 }
4711 }
4712
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4713 inline void MarkCompact::MarkObject(mirror::Object* obj,
4714 mirror::Object* holder,
4715 MemberOffset offset) {
4716 if (obj != nullptr) {
4717 MarkObjectNonNull(obj, holder, offset);
4718 }
4719 }
4720
MarkObject(mirror::Object * obj)4721 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4722 MarkObject(obj, nullptr, MemberOffset(0));
4723 return obj;
4724 }
4725
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4726 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4727 [[maybe_unused]] bool do_atomic_update) {
4728 MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4729 }
4730
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4731 void MarkCompact::VisitRoots(mirror::Object*** roots,
4732 size_t count,
4733 const RootInfo& info) {
4734 if (compacting_) {
4735 uint8_t* moving_space_begin = moving_space_begin_;
4736 uint8_t* moving_space_end = moving_space_end_;
4737 for (size_t i = 0; i < count; ++i) {
4738 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4739 }
4740 } else {
4741 for (size_t i = 0; i < count; ++i) {
4742 MarkObjectNonNull(*roots[i]);
4743 }
4744 }
4745 }
4746
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4747 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4748 size_t count,
4749 const RootInfo& info) {
4750 // TODO: do we need to check if the root is null or not?
4751 if (compacting_) {
4752 uint8_t* moving_space_begin = moving_space_begin_;
4753 uint8_t* moving_space_end = moving_space_end_;
4754 for (size_t i = 0; i < count; ++i) {
4755 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4756 }
4757 } else {
4758 for (size_t i = 0; i < count; ++i) {
4759 MarkObjectNonNull(roots[i]->AsMirrorPtr());
4760 }
4761 }
4762 }
4763
IsMarked(mirror::Object * obj)4764 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4765 if (HasAddress(obj)) {
4766 const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4767 if (compacting_) {
4768 if (is_black) {
4769 return PostCompactBlackObjAddr(obj);
4770 } else if (live_words_bitmap_->Test(obj)) {
4771 return PostCompactOldObjAddr(obj);
4772 } else {
4773 return nullptr;
4774 }
4775 }
4776 return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4777 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4778 return non_moving_space_bitmap_->Test(obj) ? obj : nullptr;
4779 } else if (immune_spaces_.ContainsObject(obj)) {
4780 return obj;
4781 } else {
4782 DCHECK(heap_->GetLargeObjectsSpace())
4783 << "ref=" << obj
4784 << " doesn't belong to any of the spaces and large object space doesn't exist";
4785 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4786 if (los_bitmap->HasAddress(obj)) {
4787 DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4788 return los_bitmap->Test(obj) ? obj : nullptr;
4789 } else {
4790 // The given obj is not in any of the known spaces, so return null. This could
4791 // happen for instance in interpreter caches wherein a concurrent updation
4792 // to the cache could result in obj being a non-reference. This is
4793 // tolerable because SweepInterpreterCaches only updates if the given
4794 // object has moved, which can't be the case for the non-reference.
4795 return nullptr;
4796 }
4797 }
4798 }
4799
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4800 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4801 [[maybe_unused]] bool do_atomic_update) {
4802 mirror::Object* ref = obj->AsMirrorPtr();
4803 if (ref == nullptr) {
4804 return true;
4805 }
4806 return IsMarked(ref);
4807 }
4808
4809 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4810 // has not yet been marked, put it on the appropriate list in the heap for later
4811 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4812 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4813 ObjPtr<mirror::Reference> ref) {
4814 heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4815 }
4816
FinishPhase()4817 void MarkCompact::FinishPhase() {
4818 GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4819 bool is_zygote = Runtime::Current()->IsZygote();
4820 compacting_ = false;
4821 minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_;
4822 // Madvise compaction buffers. When using threaded implementation, skip the first page,
4823 // which is used by the gc-thread for the next iteration. Otherwise, we get into a
4824 // deadlock due to userfault on it in the next iteration. This page is not consuming any
4825 // physical memory because we already madvised it above and then we triggered a read
4826 // userfault, which maps a special zero-page.
4827 if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() ||
4828 shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * gPageSize) {
4829 size_t adjustment = use_uffd_sigbus_ ? 0 : gPageSize;
4830 ZeroAndReleaseMemory(compaction_buffers_map_.Begin() + adjustment,
4831 compaction_buffers_map_.Size() - adjustment);
4832 } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
4833 // Now that we are going to use minor-faults from next GC cycle, we can
4834 // unmap the buffers used by worker threads.
4835 compaction_buffers_map_.SetSize(gPageSize);
4836 }
4837 info_map_.MadviseDontNeedAndZero();
4838 live_words_bitmap_->ClearBitmap();
4839 // TODO: We can clear this bitmap right before compaction pause. But in that
4840 // case we need to ensure that we don't assert on this bitmap afterwards.
4841 // Also, we would still need to clear it here again as we may have to use the
4842 // bitmap for black-allocations (see UpdateMovingSpaceBlackAllocations()).
4843 moving_space_bitmap_->Clear();
4844
4845 if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4846 heap_->DeleteThreadPool();
4847 // This unregisters all ranges as a side-effect.
4848 close(uffd_);
4849 uffd_ = kFdUnused;
4850 uffd_initialized_ = false;
4851 }
4852 CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty.
4853 mark_stack_->Reset();
4854 DCHECK_EQ(thread_running_gc_, Thread::Current());
4855 if (kIsDebugBuild) {
4856 MutexLock mu(thread_running_gc_, lock_);
4857 if (updated_roots_.get() != nullptr) {
4858 updated_roots_->clear();
4859 }
4860 }
4861 class_after_obj_ordered_map_.clear();
4862 linear_alloc_arenas_.clear();
4863 {
4864 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4865 WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4866 heap_->ClearMarkedObjects();
4867 }
4868 std::swap(moving_to_space_fd_, moving_from_space_fd_);
4869 if (IsValidFd(moving_to_space_fd_)) {
4870 // Confirm that the memfd to be used on to-space in next GC cycle is empty.
4871 struct stat buf;
4872 DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno);
4873 DCHECK_EQ(buf.st_blocks, 0u);
4874 }
4875 GcVisitedArenaPool* arena_pool =
4876 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
4877 arena_pool->DeleteUnusedArenas();
4878 }
4879
4880 } // namespace collector
4881 } // namespace gc
4882 } // namespace art
4883