1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "rosalloc.h"
18
19 #include "base/mutex-inl.h"
20 #include "gc/space/valgrind_settings.h"
21 #include "mem_map.h"
22 #include "mirror/class-inl.h"
23 #include "mirror/object.h"
24 #include "mirror/object-inl.h"
25 #include "thread-inl.h"
26 #include "thread_list.h"
27
28 #include <map>
29 #include <list>
30 #include <sstream>
31 #include <vector>
32
33 namespace art {
34 namespace gc {
35 namespace allocator {
36
37 static constexpr bool kUsePrefetchDuringAllocRun = true;
38 static constexpr bool kPrefetchNewRunDataByZeroing = false;
39 static constexpr size_t kPrefetchStride = 64;
40
41 size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
42 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
43 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
44 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
45 size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
46 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
47 bool RosAlloc::initialized_ = false;
48 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
49 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
50 reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
51
RosAlloc(void * base,size_t capacity,size_t max_capacity,PageReleaseMode page_release_mode,bool running_on_valgrind,size_t page_release_size_threshold)52 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
53 PageReleaseMode page_release_mode, bool running_on_valgrind,
54 size_t page_release_size_threshold)
55 : base_(reinterpret_cast<uint8_t*>(base)), footprint_(capacity),
56 capacity_(capacity), max_capacity_(max_capacity),
57 lock_("rosalloc global lock", kRosAllocGlobalLock),
58 bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
59 page_release_mode_(page_release_mode),
60 page_release_size_threshold_(page_release_size_threshold),
61 running_on_valgrind_(running_on_valgrind) {
62 DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
63 DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
64 CHECK_LE(capacity, max_capacity);
65 CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
66 if (!initialized_) {
67 Initialize();
68 }
69 VLOG(heap) << "RosAlloc base="
70 << std::hex << (intptr_t)base_ << ", end="
71 << std::hex << (intptr_t)(base_ + capacity_)
72 << ", capacity=" << std::dec << capacity_
73 << ", max_capacity=" << std::dec << max_capacity_;
74 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
75 size_bracket_lock_names_[i] =
76 StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
77 size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
78 current_runs_[i] = dedicated_full_run_;
79 }
80 DCHECK_EQ(footprint_, capacity_);
81 size_t num_of_pages = footprint_ / kPageSize;
82 size_t max_num_of_pages = max_capacity_ / kPageSize;
83 std::string error_msg;
84 page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", nullptr,
85 RoundUp(max_num_of_pages, kPageSize),
86 PROT_READ | PROT_WRITE, false, false, &error_msg));
87 CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
88 page_map_ = page_map_mem_map_->Begin();
89 page_map_size_ = num_of_pages;
90 max_page_map_size_ = max_num_of_pages;
91 free_page_run_size_map_.resize(num_of_pages);
92 FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
93 if (kIsDebugBuild) {
94 free_pages->magic_num_ = kMagicNumFree;
95 }
96 free_pages->SetByteSize(this, capacity_);
97 DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
98 DCHECK(free_pages->IsFree());
99 free_pages->ReleasePages(this);
100 DCHECK(free_pages->IsFree());
101 free_page_runs_.insert(free_pages);
102 if (kTraceRosAlloc) {
103 LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
104 << reinterpret_cast<intptr_t>(free_pages)
105 << " into free_page_runs_";
106 }
107 }
108
~RosAlloc()109 RosAlloc::~RosAlloc() {
110 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
111 delete size_bracket_locks_[i];
112 }
113 }
114
AllocPages(Thread * self,size_t num_pages,uint8_t page_map_type)115 void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) {
116 lock_.AssertHeld(self);
117 DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
118 FreePageRun* res = nullptr;
119 const size_t req_byte_size = num_pages * kPageSize;
120 // Find the lowest address free page run that's large enough.
121 for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
122 FreePageRun* fpr = *it;
123 DCHECK(fpr->IsFree());
124 size_t fpr_byte_size = fpr->ByteSize(this);
125 DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
126 if (req_byte_size <= fpr_byte_size) {
127 // Found one.
128 free_page_runs_.erase(it++);
129 if (kTraceRosAlloc) {
130 LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
131 << std::hex << reinterpret_cast<intptr_t>(fpr)
132 << " from free_page_runs_";
133 }
134 if (req_byte_size < fpr_byte_size) {
135 // Split.
136 FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<uint8_t*>(fpr) + req_byte_size);
137 if (kIsDebugBuild) {
138 remainder->magic_num_ = kMagicNumFree;
139 }
140 remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
141 DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
142 // Don't need to call madvise on remainder here.
143 free_page_runs_.insert(remainder);
144 if (kTraceRosAlloc) {
145 LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
146 << reinterpret_cast<intptr_t>(remainder)
147 << " into free_page_runs_";
148 }
149 fpr->SetByteSize(this, req_byte_size);
150 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
151 }
152 res = fpr;
153 break;
154 } else {
155 ++it;
156 }
157 }
158
159 // Failed to allocate pages. Grow the footprint, if possible.
160 if (UNLIKELY(res == nullptr && capacity_ > footprint_)) {
161 FreePageRun* last_free_page_run = nullptr;
162 size_t last_free_page_run_size;
163 auto it = free_page_runs_.rbegin();
164 if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
165 // There is a free page run at the end.
166 DCHECK(last_free_page_run->IsFree());
167 DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
168 last_free_page_run_size = last_free_page_run->ByteSize(this);
169 } else {
170 // There is no free page run at the end.
171 last_free_page_run_size = 0;
172 }
173 DCHECK_LT(last_free_page_run_size, req_byte_size);
174 if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
175 // If we grow the heap, we can allocate it.
176 size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
177 capacity_ - footprint_);
178 DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
179 size_t new_footprint = footprint_ + increment;
180 size_t new_num_of_pages = new_footprint / kPageSize;
181 DCHECK_LT(page_map_size_, new_num_of_pages);
182 DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
183 page_map_size_ = new_num_of_pages;
184 DCHECK_LE(page_map_size_, max_page_map_size_);
185 free_page_run_size_map_.resize(new_num_of_pages);
186 ArtRosAllocMoreCore(this, increment);
187 if (last_free_page_run_size > 0) {
188 // There was a free page run at the end. Expand its size.
189 DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
190 last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
191 DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
192 DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
193 } else {
194 // Otherwise, insert a new free page run at the end.
195 FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
196 if (kIsDebugBuild) {
197 new_free_page_run->magic_num_ = kMagicNumFree;
198 }
199 new_free_page_run->SetByteSize(this, increment);
200 DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
201 free_page_runs_.insert(new_free_page_run);
202 DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
203 if (kTraceRosAlloc) {
204 LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
205 << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
206 << " into free_page_runs_";
207 }
208 }
209 DCHECK_LE(footprint_ + increment, capacity_);
210 if (kTraceRosAlloc) {
211 LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
212 << footprint_ << " to " << new_footprint;
213 }
214 footprint_ = new_footprint;
215
216 // And retry the last free page run.
217 it = free_page_runs_.rbegin();
218 DCHECK(it != free_page_runs_.rend());
219 FreePageRun* fpr = *it;
220 if (kIsDebugBuild && last_free_page_run_size > 0) {
221 DCHECK(last_free_page_run != nullptr);
222 DCHECK_EQ(last_free_page_run, fpr);
223 }
224 size_t fpr_byte_size = fpr->ByteSize(this);
225 DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
226 DCHECK_LE(req_byte_size, fpr_byte_size);
227 free_page_runs_.erase(fpr);
228 if (kTraceRosAlloc) {
229 LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
230 << " from free_page_runs_";
231 }
232 if (req_byte_size < fpr_byte_size) {
233 // Split if there's a remainder.
234 FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<uint8_t*>(fpr) + req_byte_size);
235 if (kIsDebugBuild) {
236 remainder->magic_num_ = kMagicNumFree;
237 }
238 remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
239 DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
240 free_page_runs_.insert(remainder);
241 if (kTraceRosAlloc) {
242 LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
243 << reinterpret_cast<intptr_t>(remainder)
244 << " into free_page_runs_";
245 }
246 fpr->SetByteSize(this, req_byte_size);
247 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
248 }
249 res = fpr;
250 }
251 }
252 if (LIKELY(res != nullptr)) {
253 // Update the page map.
254 size_t page_map_idx = ToPageMapIndex(res);
255 for (size_t i = 0; i < num_pages; i++) {
256 DCHECK(IsFreePage(page_map_idx + i));
257 }
258 switch (page_map_type) {
259 case kPageMapRun:
260 page_map_[page_map_idx] = kPageMapRun;
261 for (size_t i = 1; i < num_pages; i++) {
262 page_map_[page_map_idx + i] = kPageMapRunPart;
263 }
264 break;
265 case kPageMapLargeObject:
266 page_map_[page_map_idx] = kPageMapLargeObject;
267 for (size_t i = 1; i < num_pages; i++) {
268 page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
269 }
270 break;
271 default:
272 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_type);
273 break;
274 }
275 if (kIsDebugBuild) {
276 // Clear the first page since it is not madvised due to the magic number.
277 memset(res, 0, kPageSize);
278 }
279 if (kTraceRosAlloc) {
280 LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
281 << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
282 << "(" << std::dec << (num_pages * kPageSize) << ")";
283 }
284 return res;
285 }
286
287 // Fail.
288 if (kTraceRosAlloc) {
289 LOG(INFO) << "RosAlloc::AllocPages() : nullptr";
290 }
291 return nullptr;
292 }
293
FreePages(Thread * self,void * ptr,bool already_zero)294 size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
295 lock_.AssertHeld(self);
296 size_t pm_idx = ToPageMapIndex(ptr);
297 DCHECK_LT(pm_idx, page_map_size_);
298 uint8_t pm_type = page_map_[pm_idx];
299 DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
300 uint8_t pm_part_type;
301 switch (pm_type) {
302 case kPageMapRun:
303 pm_part_type = kPageMapRunPart;
304 break;
305 case kPageMapLargeObject:
306 pm_part_type = kPageMapLargeObjectPart;
307 break;
308 default:
309 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
310 << static_cast<int>(pm_type) << ", ptr=" << std::hex
311 << reinterpret_cast<intptr_t>(ptr);
312 return 0;
313 }
314 // Update the page map and count the number of pages.
315 size_t num_pages = 1;
316 page_map_[pm_idx] = kPageMapEmpty;
317 size_t idx = pm_idx + 1;
318 size_t end = page_map_size_;
319 while (idx < end && page_map_[idx] == pm_part_type) {
320 page_map_[idx] = kPageMapEmpty;
321 num_pages++;
322 idx++;
323 }
324 const size_t byte_size = num_pages * kPageSize;
325 if (already_zero) {
326 if (ShouldCheckZeroMemory()) {
327 const uintptr_t* word_ptr = reinterpret_cast<uintptr_t*>(ptr);
328 for (size_t i = 0; i < byte_size / sizeof(uintptr_t); ++i) {
329 CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
330 }
331 }
332 } else if (!DoesReleaseAllPages()) {
333 memset(ptr, 0, byte_size);
334 }
335
336 if (kTraceRosAlloc) {
337 LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
338 << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
339 << "(" << std::dec << (num_pages * kPageSize) << ")";
340 }
341
342 // Turn it into a free run.
343 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
344 if (kIsDebugBuild) {
345 fpr->magic_num_ = kMagicNumFree;
346 }
347 fpr->SetByteSize(this, byte_size);
348 DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
349
350 DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
351 if (!free_page_runs_.empty()) {
352 // Try to coalesce in the higher address direction.
353 if (kTraceRosAlloc) {
354 LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
355 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
356 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
357 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
358 }
359 auto higher_it = free_page_runs_.upper_bound(fpr);
360 if (higher_it != free_page_runs_.end()) {
361 for (auto it = higher_it; it != free_page_runs_.end(); ) {
362 FreePageRun* h = *it;
363 DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
364 if (kTraceRosAlloc) {
365 LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
366 << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
367 << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
368 << (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
369 }
370 if (fpr->End(this) == h->Begin()) {
371 if (kTraceRosAlloc) {
372 LOG(INFO) << "Success";
373 }
374 // Clear magic num since this is no longer the start of a free page run.
375 if (kIsDebugBuild) {
376 h->magic_num_ = 0;
377 }
378 free_page_runs_.erase(it++);
379 if (kTraceRosAlloc) {
380 LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
381 << reinterpret_cast<intptr_t>(h)
382 << " from free_page_runs_";
383 }
384 fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
385 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
386 } else {
387 // Not adjacent. Stop.
388 if (kTraceRosAlloc) {
389 LOG(INFO) << "Fail";
390 }
391 break;
392 }
393 }
394 }
395 // Try to coalesce in the lower address direction.
396 auto lower_it = free_page_runs_.upper_bound(fpr);
397 if (lower_it != free_page_runs_.begin()) {
398 --lower_it;
399 for (auto it = lower_it; ; ) {
400 // We want to try to coalesce with the first element but
401 // there's no "<=" operator for the iterator.
402 bool to_exit_loop = it == free_page_runs_.begin();
403
404 FreePageRun* l = *it;
405 DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
406 if (kTraceRosAlloc) {
407 LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
408 << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
409 << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
410 << (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
411 }
412 if (l->End(this) == fpr->Begin()) {
413 if (kTraceRosAlloc) {
414 LOG(INFO) << "Success";
415 }
416 free_page_runs_.erase(it--);
417 if (kTraceRosAlloc) {
418 LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
419 << reinterpret_cast<intptr_t>(l)
420 << " from free_page_runs_";
421 }
422 l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
423 DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
424 // Clear magic num since this is no longer the start of a free page run.
425 if (kIsDebugBuild) {
426 fpr->magic_num_ = 0;
427 }
428 fpr = l;
429 } else {
430 // Not adjacent. Stop.
431 if (kTraceRosAlloc) {
432 LOG(INFO) << "Fail";
433 }
434 break;
435 }
436 if (to_exit_loop) {
437 break;
438 }
439 }
440 }
441 }
442
443 // Insert it.
444 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
445 DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
446 DCHECK(fpr->IsFree());
447 fpr->ReleasePages(this);
448 DCHECK(fpr->IsFree());
449 free_page_runs_.insert(fpr);
450 DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
451 if (kTraceRosAlloc) {
452 LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
453 << " into free_page_runs_";
454 }
455 return byte_size;
456 }
457
AllocLargeObject(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)458 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
459 size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
460 DCHECK(bytes_allocated != nullptr);
461 DCHECK(usable_size != nullptr);
462 DCHECK_GT(size, kLargeSizeThreshold);
463 size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
464 void* r;
465 {
466 MutexLock mu(self, lock_);
467 r = AllocPages(self, num_pages, kPageMapLargeObject);
468 }
469 if (UNLIKELY(r == nullptr)) {
470 if (kTraceRosAlloc) {
471 LOG(INFO) << "RosAlloc::AllocLargeObject() : nullptr";
472 }
473 return nullptr;
474 }
475 const size_t total_bytes = num_pages * kPageSize;
476 *bytes_allocated = total_bytes;
477 *usable_size = total_bytes;
478 *bytes_tl_bulk_allocated = total_bytes;
479 if (kTraceRosAlloc) {
480 LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
481 << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
482 << "(" << std::dec << (num_pages * kPageSize) << ")";
483 }
484 // Check if the returned memory is really all zero.
485 if (ShouldCheckZeroMemory()) {
486 CHECK_EQ(total_bytes % sizeof(uintptr_t), 0U);
487 const uintptr_t* words = reinterpret_cast<uintptr_t*>(r);
488 for (size_t i = 0; i < total_bytes / sizeof(uintptr_t); ++i) {
489 CHECK_EQ(words[i], 0U);
490 }
491 }
492 return r;
493 }
494
FreeInternal(Thread * self,void * ptr)495 size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
496 DCHECK_LE(base_, ptr);
497 DCHECK_LT(ptr, base_ + footprint_);
498 size_t pm_idx = RoundDownToPageMapIndex(ptr);
499 Run* run = nullptr;
500 {
501 MutexLock mu(self, lock_);
502 DCHECK_LT(pm_idx, page_map_size_);
503 uint8_t page_map_entry = page_map_[pm_idx];
504 if (kTraceRosAlloc) {
505 LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
506 << ", page_map_entry=" << static_cast<int>(page_map_entry);
507 }
508 switch (page_map_[pm_idx]) {
509 case kPageMapLargeObject:
510 return FreePages(self, ptr, false);
511 case kPageMapLargeObjectPart:
512 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
513 return 0;
514 case kPageMapRunPart: {
515 // Find the beginning of the run.
516 do {
517 --pm_idx;
518 DCHECK_LT(pm_idx, capacity_ / kPageSize);
519 } while (page_map_[pm_idx] != kPageMapRun);
520 FALLTHROUGH_INTENDED;
521 case kPageMapRun:
522 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
523 DCHECK_EQ(run->magic_num_, kMagicNum);
524 break;
525 case kPageMapReleased:
526 case kPageMapEmpty:
527 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
528 return 0;
529 }
530 default:
531 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
532 return 0;
533 }
534 }
535 DCHECK(run != nullptr);
536 return FreeFromRun(self, ptr, run);
537 }
538
Free(Thread * self,void * ptr)539 size_t RosAlloc::Free(Thread* self, void* ptr) {
540 ReaderMutexLock rmu(self, bulk_free_lock_);
541 return FreeInternal(self, ptr);
542 }
543
AllocRun(Thread * self,size_t idx)544 RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
545 RosAlloc::Run* new_run = nullptr;
546 {
547 MutexLock mu(self, lock_);
548 new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
549 }
550 if (LIKELY(new_run != nullptr)) {
551 if (kIsDebugBuild) {
552 new_run->magic_num_ = kMagicNum;
553 }
554 new_run->size_bracket_idx_ = idx;
555 new_run->SetAllocBitMapBitsForInvalidSlots();
556 DCHECK(!new_run->IsThreadLocal());
557 DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
558 DCHECK(!new_run->to_be_bulk_freed_);
559 if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
560 // Take ownership of the cache lines if we are likely to be thread local run.
561 if (kPrefetchNewRunDataByZeroing) {
562 // Zeroing the data is sometimes faster than prefetching but it increases memory usage
563 // since we end up dirtying zero pages which may have been madvised.
564 new_run->ZeroData();
565 } else {
566 const size_t num_of_slots = numOfSlots[idx];
567 const size_t bracket_size = bracketSizes[idx];
568 const size_t num_of_bytes = num_of_slots * bracket_size;
569 uint8_t* begin = reinterpret_cast<uint8_t*>(new_run) + headerSizes[idx];
570 for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
571 __builtin_prefetch(begin + i);
572 }
573 }
574 }
575 }
576 return new_run;
577 }
578
RefillRun(Thread * self,size_t idx)579 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
580 // Get the lowest address non-full run from the binary tree.
581 auto* const bt = &non_full_runs_[idx];
582 if (!bt->empty()) {
583 // If there's one, use it as the current run.
584 auto it = bt->begin();
585 Run* non_full_run = *it;
586 DCHECK(non_full_run != nullptr);
587 DCHECK(!non_full_run->IsThreadLocal());
588 bt->erase(it);
589 return non_full_run;
590 }
591 // If there's none, allocate a new run and use it as the current run.
592 return AllocRun(self, idx);
593 }
594
AllocFromCurrentRunUnlocked(Thread * self,size_t idx)595 inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
596 Run* current_run = current_runs_[idx];
597 DCHECK(current_run != nullptr);
598 void* slot_addr = current_run->AllocSlot();
599 if (UNLIKELY(slot_addr == nullptr)) {
600 // The current run got full. Try to refill it.
601 DCHECK(current_run->IsFull());
602 if (kIsDebugBuild && current_run != dedicated_full_run_) {
603 full_runs_[idx].insert(current_run);
604 if (kTraceRosAlloc) {
605 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
606 << reinterpret_cast<intptr_t>(current_run)
607 << " into full_runs_[" << std::dec << idx << "]";
608 }
609 DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
610 DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
611 }
612 current_run = RefillRun(self, idx);
613 if (UNLIKELY(current_run == nullptr)) {
614 // Failed to allocate a new run, make sure that it is the dedicated full run.
615 current_runs_[idx] = dedicated_full_run_;
616 return nullptr;
617 }
618 DCHECK(current_run != nullptr);
619 DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
620 DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
621 current_run->SetIsThreadLocal(false);
622 current_runs_[idx] = current_run;
623 DCHECK(!current_run->IsFull());
624 slot_addr = current_run->AllocSlot();
625 // Must succeed now with a new run.
626 DCHECK(slot_addr != nullptr);
627 }
628 return slot_addr;
629 }
630
AllocFromRunThreadUnsafe(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)631 void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
632 size_t* usable_size,
633 size_t* bytes_tl_bulk_allocated) {
634 DCHECK(bytes_allocated != nullptr);
635 DCHECK(usable_size != nullptr);
636 DCHECK(bytes_tl_bulk_allocated != nullptr);
637 DCHECK_LE(size, kLargeSizeThreshold);
638 size_t bracket_size;
639 size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
640 DCHECK_EQ(idx, SizeToIndex(size));
641 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
642 DCHECK_EQ(bracket_size, bracketSizes[idx]);
643 DCHECK_LE(size, bracket_size);
644 DCHECK(size > 512 || bracket_size - size < 16);
645 Locks::mutator_lock_->AssertExclusiveHeld(self);
646 void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
647 if (LIKELY(slot_addr != nullptr)) {
648 *bytes_allocated = bracket_size;
649 *usable_size = bracket_size;
650 *bytes_tl_bulk_allocated = bracket_size;
651 }
652 // Caller verifies that it is all 0.
653 return slot_addr;
654 }
655
AllocFromRun(Thread * self,size_t size,size_t * bytes_allocated,size_t * usable_size,size_t * bytes_tl_bulk_allocated)656 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated,
657 size_t* usable_size, size_t* bytes_tl_bulk_allocated) {
658 DCHECK(bytes_allocated != nullptr);
659 DCHECK(usable_size != nullptr);
660 DCHECK(bytes_tl_bulk_allocated != nullptr);
661 DCHECK_LE(size, kLargeSizeThreshold);
662 size_t bracket_size;
663 size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
664 DCHECK_EQ(idx, SizeToIndex(size));
665 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
666 DCHECK_EQ(bracket_size, bracketSizes[idx]);
667 DCHECK_LE(size, bracket_size);
668 DCHECK(size > 512 || bracket_size - size < 16);
669
670 void* slot_addr;
671
672 if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
673 // Use a thread-local run.
674 Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
675 // Allow invalid since this will always fail the allocation.
676 if (kIsDebugBuild) {
677 // Need the lock to prevent race conditions.
678 MutexLock mu(self, *size_bracket_locks_[idx]);
679 CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
680 CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
681 }
682 DCHECK(thread_local_run != nullptr);
683 DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
684 slot_addr = thread_local_run->AllocSlot();
685 // The allocation must fail if the run is invalid.
686 DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
687 << "allocated from an invalid run";
688 if (UNLIKELY(slot_addr == nullptr)) {
689 // The run got full. Try to free slots.
690 DCHECK(thread_local_run->IsFull());
691 MutexLock mu(self, *size_bracket_locks_[idx]);
692 bool is_all_free_after_merge;
693 // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
694 if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
695 DCHECK_NE(thread_local_run, dedicated_full_run_);
696 // Some slot got freed. Keep it.
697 DCHECK(!thread_local_run->IsFull());
698 DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
699 if (is_all_free_after_merge) {
700 // Check that the bitmap idx is back at 0 if it's all free.
701 DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
702 }
703 } else {
704 // No slots got freed. Try to refill the thread-local run.
705 DCHECK(thread_local_run->IsFull());
706 if (thread_local_run != dedicated_full_run_) {
707 thread_local_run->SetIsThreadLocal(false);
708 if (kIsDebugBuild) {
709 full_runs_[idx].insert(thread_local_run);
710 if (kTraceRosAlloc) {
711 LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
712 << reinterpret_cast<intptr_t>(thread_local_run)
713 << " into full_runs_[" << std::dec << idx << "]";
714 }
715 }
716 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
717 DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
718 }
719
720 thread_local_run = RefillRun(self, idx);
721 if (UNLIKELY(thread_local_run == nullptr)) {
722 self->SetRosAllocRun(idx, dedicated_full_run_);
723 return nullptr;
724 }
725 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
726 DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
727 thread_local_run->SetIsThreadLocal(true);
728 self->SetRosAllocRun(idx, thread_local_run);
729 DCHECK(!thread_local_run->IsFull());
730 }
731 DCHECK(thread_local_run != nullptr);
732 DCHECK(!thread_local_run->IsFull());
733 DCHECK(thread_local_run->IsThreadLocal());
734 // Account for all the free slots in the new or refreshed thread local run.
735 *bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size;
736 slot_addr = thread_local_run->AllocSlot();
737 // Must succeed now with a new run.
738 DCHECK(slot_addr != nullptr);
739 } else {
740 // The slot is already counted. Leave it as is.
741 *bytes_tl_bulk_allocated = 0;
742 }
743 DCHECK(slot_addr != nullptr);
744 if (kTraceRosAlloc) {
745 LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex
746 << reinterpret_cast<intptr_t>(slot_addr)
747 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
748 << "(" << std::dec << (bracket_size) << ")";
749 }
750 *bytes_allocated = bracket_size;
751 *usable_size = bracket_size;
752 } else {
753 // Use the (shared) current run.
754 MutexLock mu(self, *size_bracket_locks_[idx]);
755 slot_addr = AllocFromCurrentRunUnlocked(self, idx);
756 if (kTraceRosAlloc) {
757 LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex
758 << reinterpret_cast<intptr_t>(slot_addr)
759 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
760 << "(" << std::dec << (bracket_size) << ")";
761 }
762 if (LIKELY(slot_addr != nullptr)) {
763 *bytes_allocated = bracket_size;
764 *usable_size = bracket_size;
765 *bytes_tl_bulk_allocated = bracket_size;
766 }
767 }
768 // Caller verifies that it is all 0.
769 return slot_addr;
770 }
771
FreeFromRun(Thread * self,void * ptr,Run * run)772 size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
773 DCHECK_EQ(run->magic_num_, kMagicNum);
774 DCHECK_LT(run, ptr);
775 DCHECK_LT(ptr, run->End());
776 const size_t idx = run->size_bracket_idx_;
777 const size_t bracket_size = bracketSizes[idx];
778 bool run_was_full = false;
779 MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
780 if (kIsDebugBuild) {
781 run_was_full = run->IsFull();
782 }
783 if (kTraceRosAlloc) {
784 LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
785 }
786 if (LIKELY(run->IsThreadLocal())) {
787 // It's a thread-local run. Just mark the thread-local free bit map and return.
788 DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
789 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
790 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
791 run->MarkThreadLocalFreeBitMap(ptr);
792 if (kTraceRosAlloc) {
793 LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
794 << reinterpret_cast<intptr_t>(run);
795 }
796 // A thread local run will be kept as a thread local even if it's become all free.
797 return bracket_size;
798 }
799 // Free the slot in the run.
800 run->FreeSlot(ptr);
801 auto* non_full_runs = &non_full_runs_[idx];
802 if (run->IsAllFree()) {
803 // It has just become completely free. Free the pages of this run.
804 std::set<Run*>::iterator pos = non_full_runs->find(run);
805 if (pos != non_full_runs->end()) {
806 non_full_runs->erase(pos);
807 if (kTraceRosAlloc) {
808 LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
809 << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
810 }
811 }
812 if (run == current_runs_[idx]) {
813 current_runs_[idx] = dedicated_full_run_;
814 }
815 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
816 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
817 run->ZeroHeader();
818 {
819 MutexLock lock_mu(self, lock_);
820 FreePages(self, run, true);
821 }
822 } else {
823 // It is not completely free. If it wasn't the current run or
824 // already in the non-full run set (i.e., it was full) insert it
825 // into the non-full run set.
826 if (run != current_runs_[idx]) {
827 auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : nullptr;
828 auto pos = non_full_runs->find(run);
829 if (pos == non_full_runs->end()) {
830 DCHECK(run_was_full);
831 DCHECK(full_runs->find(run) != full_runs->end());
832 if (kIsDebugBuild) {
833 full_runs->erase(run);
834 if (kTraceRosAlloc) {
835 LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
836 << reinterpret_cast<intptr_t>(run) << " from full_runs_";
837 }
838 }
839 non_full_runs->insert(run);
840 DCHECK(!run->IsFull());
841 if (kTraceRosAlloc) {
842 LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
843 << reinterpret_cast<intptr_t>(run)
844 << " into non_full_runs_[" << std::dec << idx << "]";
845 }
846 }
847 }
848 }
849 return bracket_size;
850 }
851
BitMapToStr(uint32_t * bit_map_base,size_t num_vec)852 std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
853 std::string bit_map_str;
854 for (size_t v = 0; v < num_vec; v++) {
855 uint32_t vec = bit_map_base[v];
856 if (v != num_vec - 1) {
857 bit_map_str.append(StringPrintf("%x-", vec));
858 } else {
859 bit_map_str.append(StringPrintf("%x", vec));
860 }
861 }
862 return bit_map_str.c_str();
863 }
864
Dump()865 std::string RosAlloc::Run::Dump() {
866 size_t idx = size_bracket_idx_;
867 size_t num_slots = numOfSlots[idx];
868 size_t num_vec = RoundUp(num_slots, 32) / 32;
869 std::ostringstream stream;
870 stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
871 << "{ magic_num=" << static_cast<int>(magic_num_)
872 << " size_bracket_idx=" << idx
873 << " is_thread_local=" << static_cast<int>(is_thread_local_)
874 << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
875 << " first_search_vec_idx=" << first_search_vec_idx_
876 << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
877 << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
878 << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
879 << " }" << std::endl;
880 return stream.str();
881 }
882
FreeSlot(void * ptr)883 void RosAlloc::Run::FreeSlot(void* ptr) {
884 DCHECK(!IsThreadLocal());
885 const uint8_t idx = size_bracket_idx_;
886 const size_t bracket_size = bracketSizes[idx];
887 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
888 - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
889 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
890 size_t slot_idx = offset_from_slot_base / bracket_size;
891 DCHECK_LT(slot_idx, numOfSlots[idx]);
892 size_t vec_idx = slot_idx / 32;
893 if (kIsDebugBuild) {
894 size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
895 DCHECK_LT(vec_idx, num_vec);
896 }
897 size_t vec_off = slot_idx % 32;
898 uint32_t* vec = &alloc_bit_map_[vec_idx];
899 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
900 const uint32_t mask = 1U << vec_off;
901 DCHECK_NE(*vec & mask, 0U);
902 *vec &= ~mask;
903 DCHECK_EQ(*vec & mask, 0U);
904 // Zero out the memory.
905 // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
906 memset(ptr, 0, bracket_size);
907 if (kTraceRosAlloc) {
908 LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
909 << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
910 }
911 }
912
NumberOfFreeSlots()913 size_t RosAlloc::Run::NumberOfFreeSlots() {
914 size_t num_alloc_slots = 0;
915 const size_t idx = size_bracket_idx_;
916 const size_t num_slots = numOfSlots[idx];
917 const size_t num_vec = RoundUp(num_slots, 32) / 32;
918 DCHECK_NE(num_vec, 0U);
919 for (size_t v = 0; v < num_vec - 1; v++) {
920 num_alloc_slots += POPCOUNT(alloc_bit_map_[v]);
921 }
922 // Don't count the invalid bits in the last vector.
923 uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] &
924 ~GetBitmapLastVectorMask(num_slots, num_vec);
925 num_alloc_slots += POPCOUNT(last_vec_masked);
926 size_t num_free_slots = num_slots - num_alloc_slots;
927 DCHECK_LE(num_alloc_slots, num_slots);
928 DCHECK_LE(num_free_slots, num_slots);
929 return num_free_slots;
930 }
931
MergeThreadLocalFreeBitMapToAllocBitMap(bool * is_all_free_after_out)932 inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
933 DCHECK(IsThreadLocal());
934 // Free slots in the alloc bit map based on the thread local free bit map.
935 const size_t idx = size_bracket_idx_;
936 const size_t num_of_slots = numOfSlots[idx];
937 const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
938 bool changed = false;
939 uint32_t* vecp = &alloc_bit_map_[0];
940 uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
941 bool is_all_free_after = true;
942 for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
943 uint32_t tl_free_vec = *tl_free_vecp;
944 uint32_t vec_before = *vecp;
945 uint32_t vec_after;
946 if (tl_free_vec != 0) {
947 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
948 vec_after = vec_before & ~tl_free_vec;
949 *vecp = vec_after;
950 changed = true;
951 *tl_free_vecp = 0; // clear the thread local free bit map.
952 } else {
953 vec_after = vec_before;
954 }
955 if (vec_after != 0) {
956 if (v == num_vec - 1) {
957 // Only not all free if a bit other than the mask bits are set.
958 is_all_free_after =
959 is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
960 } else {
961 is_all_free_after = false;
962 }
963 }
964 DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
965 }
966 *is_all_free_after_out = is_all_free_after;
967 // Return true if there was at least a bit set in the thread-local
968 // free bit map and at least a bit in the alloc bit map changed.
969 return changed;
970 }
971
MergeBulkFreeBitMapIntoAllocBitMap()972 inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
973 DCHECK(!IsThreadLocal());
974 // Free slots in the alloc bit map based on the bulk free bit map.
975 const size_t num_vec = NumberOfBitmapVectors();
976 uint32_t* vecp = &alloc_bit_map_[0];
977 uint32_t* free_vecp = &BulkFreeBitMap()[0];
978 for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
979 uint32_t free_vec = *free_vecp;
980 if (free_vec != 0) {
981 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
982 *vecp &= ~free_vec;
983 *free_vecp = 0; // clear the bulk free bit map.
984 }
985 DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
986 }
987 }
988
UnionBulkFreeBitMapToThreadLocalFreeBitMap()989 inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
990 DCHECK(IsThreadLocal());
991 // Union the thread local bit map with the bulk free bit map.
992 size_t num_vec = NumberOfBitmapVectors();
993 uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
994 uint32_t* from_vecp = &BulkFreeBitMap()[0];
995 for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
996 uint32_t from_vec = *from_vecp;
997 if (from_vec != 0) {
998 *to_vecp |= from_vec;
999 *from_vecp = 0; // clear the bulk free bit map.
1000 }
1001 DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
1002 }
1003 }
1004
MarkThreadLocalFreeBitMap(void * ptr)1005 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
1006 DCHECK(IsThreadLocal());
1007 MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
1008 }
1009
MarkBulkFreeBitMap(void * ptr)1010 inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
1011 return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
1012 }
1013
MarkFreeBitMapShared(void * ptr,uint32_t * free_bit_map_base,const char * caller_name)1014 inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
1015 const char* caller_name) {
1016 const uint8_t idx = size_bracket_idx_;
1017 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
1018 - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
1019 const size_t bracket_size = bracketSizes[idx];
1020 memset(ptr, 0, bracket_size);
1021 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
1022 size_t slot_idx = offset_from_slot_base / bracket_size;
1023 DCHECK_LT(slot_idx, numOfSlots[idx]);
1024 size_t vec_idx = slot_idx / 32;
1025 if (kIsDebugBuild) {
1026 size_t num_vec = NumberOfBitmapVectors();
1027 DCHECK_LT(vec_idx, num_vec);
1028 }
1029 size_t vec_off = slot_idx % 32;
1030 uint32_t* vec = &free_bit_map_base[vec_idx];
1031 const uint32_t mask = 1U << vec_off;
1032 DCHECK_EQ(*vec & mask, 0U);
1033 *vec |= mask;
1034 DCHECK_NE(*vec & mask, 0U);
1035 if (kTraceRosAlloc) {
1036 LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
1037 << reinterpret_cast<intptr_t>(ptr)
1038 << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
1039 }
1040 return bracket_size;
1041 }
1042
GetBitmapLastVectorMask(size_t num_slots,size_t num_vec)1043 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
1044 const size_t kBitsPerVec = 32;
1045 DCHECK_GE(num_vec * kBitsPerVec, num_slots);
1046 DCHECK_NE(num_vec, 0U);
1047 size_t remain = num_vec * kBitsPerVec - num_slots;
1048 DCHECK_LT(remain, kBitsPerVec);
1049 return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
1050 }
1051
IsAllFree()1052 inline bool RosAlloc::Run::IsAllFree() {
1053 const uint8_t idx = size_bracket_idx_;
1054 const size_t num_slots = numOfSlots[idx];
1055 const size_t num_vec = NumberOfBitmapVectors();
1056 DCHECK_NE(num_vec, 0U);
1057 // Check the last vector after the loop since it uses a special case for the masked bits.
1058 for (size_t v = 0; v < num_vec - 1; v++) {
1059 uint32_t vec = alloc_bit_map_[v];
1060 if (vec != 0) {
1061 return false;
1062 }
1063 }
1064 // Make sure the last word is equal to the mask, all other bits must be 0.
1065 return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
1066 }
1067
IsBulkFreeBitmapClean()1068 inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
1069 const size_t num_vec = NumberOfBitmapVectors();
1070 for (size_t v = 0; v < num_vec; v++) {
1071 uint32_t vec = BulkFreeBitMap()[v];
1072 if (vec != 0) {
1073 return false;
1074 }
1075 }
1076 return true;
1077 }
1078
IsThreadLocalFreeBitmapClean()1079 inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
1080 const size_t num_vec = NumberOfBitmapVectors();
1081 for (size_t v = 0; v < num_vec; v++) {
1082 uint32_t vec = ThreadLocalFreeBitMap()[v];
1083 if (vec != 0) {
1084 return false;
1085 }
1086 }
1087 return true;
1088 }
1089
SetAllocBitMapBitsForInvalidSlots()1090 inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
1091 const size_t idx = size_bracket_idx_;
1092 const size_t num_slots = numOfSlots[idx];
1093 const size_t num_vec = RoundUp(num_slots, 32) / 32;
1094 DCHECK_NE(num_vec, 0U);
1095 // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
1096 // don't represent valid slots.
1097 alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
1098 }
1099
ZeroHeader()1100 inline void RosAlloc::Run::ZeroHeader() {
1101 const uint8_t idx = size_bracket_idx_;
1102 memset(this, 0, headerSizes[idx]);
1103 }
1104
ZeroData()1105 inline void RosAlloc::Run::ZeroData() {
1106 const uint8_t idx = size_bracket_idx_;
1107 uint8_t* slot_begin = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
1108 memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
1109 }
1110
FillAllocBitMap()1111 inline void RosAlloc::Run::FillAllocBitMap() {
1112 size_t num_vec = NumberOfBitmapVectors();
1113 memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
1114 first_search_vec_idx_ = num_vec - 1; // No free bits in any of the bitmap words.
1115 }
1116
InspectAllSlots(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1117 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1118 void* arg) {
1119 size_t idx = size_bracket_idx_;
1120 uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
1121 size_t num_slots = numOfSlots[idx];
1122 size_t bracket_size = IndexToBracketSize(idx);
1123 DCHECK_EQ(slot_base + num_slots * bracket_size,
1124 reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize);
1125 size_t num_vec = RoundUp(num_slots, 32) / 32;
1126 size_t slots = 0;
1127 const uint32_t* const tl_free_vecp = IsThreadLocal() ? ThreadLocalFreeBitMap() : nullptr;
1128 for (size_t v = 0; v < num_vec; v++, slots += 32) {
1129 DCHECK_GE(num_slots, slots);
1130 uint32_t vec = alloc_bit_map_[v];
1131 if (tl_free_vecp != nullptr) {
1132 // Clear out the set bits in the thread local free bitmap since these aren't actually
1133 // allocated.
1134 vec &= ~tl_free_vecp[v];
1135 }
1136 size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
1137 for (size_t i = 0; i < end; ++i) {
1138 bool is_allocated = ((vec >> i) & 0x1) != 0;
1139 uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
1140 if (is_allocated) {
1141 handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
1142 } else {
1143 handler(slot_addr, slot_addr + bracket_size, 0, arg);
1144 }
1145 }
1146 }
1147 }
1148
1149 // If true, read the page map entries in BulkFree() without using the
1150 // lock for better performance, assuming that the existence of an
1151 // allocated chunk/pointer being freed in BulkFree() guarantees that
1152 // the page map entry won't change. Disabled for now.
1153 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
1154
BulkFree(Thread * self,void ** ptrs,size_t num_ptrs)1155 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
1156 size_t freed_bytes = 0;
1157 if ((false)) {
1158 // Used only to test Free() as GC uses only BulkFree().
1159 for (size_t i = 0; i < num_ptrs; ++i) {
1160 freed_bytes += FreeInternal(self, ptrs[i]);
1161 }
1162 return freed_bytes;
1163 }
1164
1165 WriterMutexLock wmu(self, bulk_free_lock_);
1166
1167 // First mark slots to free in the bulk free bit map without locking the
1168 // size bracket locks. On host, unordered_set is faster than vector + flag.
1169 #ifdef HAVE_ANDROID_OS
1170 std::vector<Run*> runs;
1171 #else
1172 std::unordered_set<Run*, hash_run, eq_run> runs;
1173 #endif
1174 for (size_t i = 0; i < num_ptrs; i++) {
1175 void* ptr = ptrs[i];
1176 DCHECK_LE(base_, ptr);
1177 DCHECK_LT(ptr, base_ + footprint_);
1178 size_t pm_idx = RoundDownToPageMapIndex(ptr);
1179 Run* run = nullptr;
1180 if (kReadPageMapEntryWithoutLockInBulkFree) {
1181 // Read the page map entries without locking the lock.
1182 uint8_t page_map_entry = page_map_[pm_idx];
1183 if (kTraceRosAlloc) {
1184 LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1185 << std::dec << pm_idx
1186 << ", page_map_entry=" << static_cast<int>(page_map_entry);
1187 }
1188 if (LIKELY(page_map_entry == kPageMapRun)) {
1189 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1190 } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1191 size_t pi = pm_idx;
1192 // Find the beginning of the run.
1193 do {
1194 --pi;
1195 DCHECK_LT(pi, capacity_ / kPageSize);
1196 } while (page_map_[pi] != kPageMapRun);
1197 run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1198 } else if (page_map_entry == kPageMapLargeObject) {
1199 MutexLock mu(self, lock_);
1200 freed_bytes += FreePages(self, ptr, false);
1201 continue;
1202 } else {
1203 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_entry);
1204 }
1205 } else {
1206 // Read the page map entries with a lock.
1207 MutexLock mu(self, lock_);
1208 DCHECK_LT(pm_idx, page_map_size_);
1209 uint8_t page_map_entry = page_map_[pm_idx];
1210 if (kTraceRosAlloc) {
1211 LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1212 << std::dec << pm_idx
1213 << ", page_map_entry=" << static_cast<int>(page_map_entry);
1214 }
1215 if (LIKELY(page_map_entry == kPageMapRun)) {
1216 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1217 } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1218 size_t pi = pm_idx;
1219 // Find the beginning of the run.
1220 do {
1221 --pi;
1222 DCHECK_LT(pi, capacity_ / kPageSize);
1223 } while (page_map_[pi] != kPageMapRun);
1224 run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1225 } else if (page_map_entry == kPageMapLargeObject) {
1226 freed_bytes += FreePages(self, ptr, false);
1227 continue;
1228 } else {
1229 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_entry);
1230 }
1231 }
1232 DCHECK(run != nullptr);
1233 DCHECK_EQ(run->magic_num_, kMagicNum);
1234 // Set the bit in the bulk free bit map.
1235 freed_bytes += run->MarkBulkFreeBitMap(ptr);
1236 #ifdef HAVE_ANDROID_OS
1237 if (!run->to_be_bulk_freed_) {
1238 run->to_be_bulk_freed_ = true;
1239 runs.push_back(run);
1240 }
1241 #else
1242 runs.insert(run);
1243 #endif
1244 }
1245
1246 // Now, iterate over the affected runs and update the alloc bit map
1247 // based on the bulk free bit map (for non-thread-local runs) and
1248 // union the bulk free bit map into the thread-local free bit map
1249 // (for thread-local runs.)
1250 for (Run* run : runs) {
1251 #ifdef HAVE_ANDROID_OS
1252 DCHECK(run->to_be_bulk_freed_);
1253 run->to_be_bulk_freed_ = false;
1254 #endif
1255 size_t idx = run->size_bracket_idx_;
1256 MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
1257 if (run->IsThreadLocal()) {
1258 DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
1259 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
1260 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
1261 run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
1262 if (kTraceRosAlloc) {
1263 LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
1264 << std::hex << reinterpret_cast<intptr_t>(run);
1265 }
1266 DCHECK(run->IsThreadLocal());
1267 // A thread local run will be kept as a thread local even if
1268 // it's become all free.
1269 } else {
1270 bool run_was_full = run->IsFull();
1271 run->MergeBulkFreeBitMapIntoAllocBitMap();
1272 if (kTraceRosAlloc) {
1273 LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
1274 << reinterpret_cast<intptr_t>(run);
1275 }
1276 // Check if the run should be moved to non_full_runs_ or
1277 // free_page_runs_.
1278 auto* non_full_runs = &non_full_runs_[idx];
1279 auto* full_runs = kIsDebugBuild ? &full_runs_[idx] : nullptr;
1280 if (run->IsAllFree()) {
1281 // It has just become completely free. Free the pages of the
1282 // run.
1283 bool run_was_current = run == current_runs_[idx];
1284 if (run_was_current) {
1285 DCHECK(full_runs->find(run) == full_runs->end());
1286 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1287 // If it was a current run, reuse it.
1288 } else if (run_was_full) {
1289 // If it was full, remove it from the full run set (debug
1290 // only.)
1291 if (kIsDebugBuild) {
1292 std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
1293 DCHECK(pos != full_runs->end());
1294 full_runs->erase(pos);
1295 if (kTraceRosAlloc) {
1296 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1297 << reinterpret_cast<intptr_t>(run)
1298 << " from full_runs_";
1299 }
1300 DCHECK(full_runs->find(run) == full_runs->end());
1301 }
1302 } else {
1303 // If it was in a non full run set, remove it from the set.
1304 DCHECK(full_runs->find(run) == full_runs->end());
1305 DCHECK(non_full_runs->find(run) != non_full_runs->end());
1306 non_full_runs->erase(run);
1307 if (kTraceRosAlloc) {
1308 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1309 << reinterpret_cast<intptr_t>(run)
1310 << " from non_full_runs_";
1311 }
1312 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1313 }
1314 if (!run_was_current) {
1315 run->ZeroHeader();
1316 MutexLock lock_mu(self, lock_);
1317 FreePages(self, run, true);
1318 }
1319 } else {
1320 // It is not completely free. If it wasn't the current run or
1321 // already in the non-full run set (i.e., it was full) insert
1322 // it into the non-full run set.
1323 if (run == current_runs_[idx]) {
1324 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1325 DCHECK(full_runs->find(run) == full_runs->end());
1326 // If it was a current run, keep it.
1327 } else if (run_was_full) {
1328 // If it was full, remove it from the full run set (debug
1329 // only) and insert into the non-full run set.
1330 DCHECK(full_runs->find(run) != full_runs->end());
1331 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1332 if (kIsDebugBuild) {
1333 full_runs->erase(run);
1334 if (kTraceRosAlloc) {
1335 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1336 << reinterpret_cast<intptr_t>(run)
1337 << " from full_runs_";
1338 }
1339 }
1340 non_full_runs->insert(run);
1341 if (kTraceRosAlloc) {
1342 LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
1343 << reinterpret_cast<intptr_t>(run)
1344 << " into non_full_runs_[" << std::dec << idx;
1345 }
1346 } else {
1347 // If it was not full, so leave it in the non full run set.
1348 DCHECK(full_runs->find(run) == full_runs->end());
1349 DCHECK(non_full_runs->find(run) != non_full_runs->end());
1350 }
1351 }
1352 }
1353 }
1354 return freed_bytes;
1355 }
1356
DumpPageMap()1357 std::string RosAlloc::DumpPageMap() {
1358 std::ostringstream stream;
1359 stream << "RosAlloc PageMap: " << std::endl;
1360 lock_.AssertHeld(Thread::Current());
1361 size_t end = page_map_size_;
1362 FreePageRun* curr_fpr = nullptr;
1363 size_t curr_fpr_size = 0;
1364 size_t remaining_curr_fpr_size = 0;
1365 size_t num_running_empty_pages = 0;
1366 for (size_t i = 0; i < end; ++i) {
1367 uint8_t pm = page_map_[i];
1368 switch (pm) {
1369 case kPageMapReleased:
1370 // Fall-through.
1371 case kPageMapEmpty: {
1372 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1373 if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
1374 // Encountered a fresh free page run.
1375 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1376 DCHECK(fpr->IsFree());
1377 DCHECK(curr_fpr == nullptr);
1378 DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
1379 curr_fpr = fpr;
1380 curr_fpr_size = fpr->ByteSize(this);
1381 DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
1382 remaining_curr_fpr_size = curr_fpr_size - kPageSize;
1383 stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
1384 << " (FPR start) fpr_size=" << curr_fpr_size
1385 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1386 if (remaining_curr_fpr_size == 0) {
1387 // Reset at the end of the current free page run.
1388 curr_fpr = nullptr;
1389 curr_fpr_size = 0;
1390 }
1391 stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
1392 DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
1393 } else {
1394 // Still part of the current free page run.
1395 DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
1396 DCHECK(curr_fpr != nullptr && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
1397 DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
1398 DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
1399 remaining_curr_fpr_size -= kPageSize;
1400 stream << "[" << i << "]=Empty (FPR part)"
1401 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1402 if (remaining_curr_fpr_size == 0) {
1403 // Reset at the end of the current free page run.
1404 curr_fpr = nullptr;
1405 curr_fpr_size = 0;
1406 }
1407 }
1408 num_running_empty_pages++;
1409 break;
1410 }
1411 case kPageMapLargeObject: {
1412 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1413 num_running_empty_pages = 0;
1414 stream << "[" << i << "]=Large (start)" << std::endl;
1415 break;
1416 }
1417 case kPageMapLargeObjectPart:
1418 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1419 num_running_empty_pages = 0;
1420 stream << "[" << i << "]=Large (part)" << std::endl;
1421 break;
1422 case kPageMapRun: {
1423 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1424 num_running_empty_pages = 0;
1425 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1426 size_t idx = run->size_bracket_idx_;
1427 stream << "[" << i << "]=Run (start)"
1428 << " idx=" << idx
1429 << " numOfPages=" << numOfPages[idx]
1430 << " is_thread_local=" << run->is_thread_local_
1431 << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
1432 << std::endl;
1433 break;
1434 }
1435 case kPageMapRunPart:
1436 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1437 num_running_empty_pages = 0;
1438 stream << "[" << i << "]=Run (part)" << std::endl;
1439 break;
1440 default:
1441 stream << "[" << i << "]=Unrecognizable page map type: " << pm;
1442 break;
1443 }
1444 }
1445 return stream.str();
1446 }
1447
UsableSize(const void * ptr)1448 size_t RosAlloc::UsableSize(const void* ptr) {
1449 DCHECK_LE(base_, ptr);
1450 DCHECK_LT(ptr, base_ + footprint_);
1451 size_t pm_idx = RoundDownToPageMapIndex(ptr);
1452 MutexLock mu(Thread::Current(), lock_);
1453 switch (page_map_[pm_idx]) {
1454 case kPageMapReleased:
1455 // Fall-through.
1456 case kPageMapEmpty:
1457 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1458 << std::hex << reinterpret_cast<intptr_t>(ptr);
1459 break;
1460 case kPageMapLargeObject: {
1461 size_t num_pages = 1;
1462 size_t idx = pm_idx + 1;
1463 size_t end = page_map_size_;
1464 while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
1465 num_pages++;
1466 idx++;
1467 }
1468 return num_pages * kPageSize;
1469 }
1470 case kPageMapLargeObjectPart:
1471 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1472 << std::hex << reinterpret_cast<intptr_t>(ptr);
1473 break;
1474 case kPageMapRun:
1475 case kPageMapRunPart: {
1476 // Find the beginning of the run.
1477 while (page_map_[pm_idx] != kPageMapRun) {
1478 pm_idx--;
1479 DCHECK_LT(pm_idx, capacity_ / kPageSize);
1480 }
1481 DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
1482 Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1483 DCHECK_EQ(run->magic_num_, kMagicNum);
1484 size_t idx = run->size_bracket_idx_;
1485 size_t offset_from_slot_base = reinterpret_cast<const uint8_t*>(ptr)
1486 - (reinterpret_cast<uint8_t*>(run) + headerSizes[idx]);
1487 DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
1488 return IndexToBracketSize(idx);
1489 }
1490 default: {
1491 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(page_map_[pm_idx]);
1492 break;
1493 }
1494 }
1495 return 0;
1496 }
1497
Trim()1498 bool RosAlloc::Trim() {
1499 MutexLock mu(Thread::Current(), lock_);
1500 FreePageRun* last_free_page_run;
1501 DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
1502 auto it = free_page_runs_.rbegin();
1503 if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
1504 // Remove the last free page run, if any.
1505 DCHECK(last_free_page_run->IsFree());
1506 DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
1507 DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
1508 DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
1509 free_page_runs_.erase(last_free_page_run);
1510 size_t decrement = last_free_page_run->ByteSize(this);
1511 size_t new_footprint = footprint_ - decrement;
1512 DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
1513 size_t new_num_of_pages = new_footprint / kPageSize;
1514 DCHECK_GE(page_map_size_, new_num_of_pages);
1515 // Zero out the tail of the page map.
1516 uint8_t* zero_begin = const_cast<uint8_t*>(page_map_) + new_num_of_pages;
1517 uint8_t* madvise_begin = AlignUp(zero_begin, kPageSize);
1518 DCHECK_LE(madvise_begin, page_map_mem_map_->End());
1519 size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
1520 if (madvise_size > 0) {
1521 DCHECK_ALIGNED(madvise_begin, kPageSize);
1522 DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
1523 if (!kMadviseZeroes) {
1524 memset(madvise_begin, 0, madvise_size);
1525 }
1526 CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
1527 }
1528 if (madvise_begin - zero_begin) {
1529 memset(zero_begin, 0, madvise_begin - zero_begin);
1530 }
1531 page_map_size_ = new_num_of_pages;
1532 free_page_run_size_map_.resize(new_num_of_pages);
1533 DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
1534 ArtRosAllocMoreCore(this, -(static_cast<intptr_t>(decrement)));
1535 if (kTraceRosAlloc) {
1536 LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
1537 << footprint_ << " to " << new_footprint;
1538 }
1539 DCHECK_LT(new_footprint, footprint_);
1540 DCHECK_LT(new_footprint, capacity_);
1541 footprint_ = new_footprint;
1542 return true;
1543 }
1544 return false;
1545 }
1546
InspectAll(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1547 void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1548 void* arg) {
1549 // Note: no need to use this to release pages as we already do so in FreePages().
1550 if (handler == nullptr) {
1551 return;
1552 }
1553 MutexLock mu(Thread::Current(), lock_);
1554 size_t pm_end = page_map_size_;
1555 size_t i = 0;
1556 while (i < pm_end) {
1557 uint8_t pm = page_map_[i];
1558 switch (pm) {
1559 case kPageMapReleased:
1560 // Fall-through.
1561 case kPageMapEmpty: {
1562 // The start of a free page run.
1563 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1564 DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
1565 size_t fpr_size = fpr->ByteSize(this);
1566 DCHECK(IsAligned<kPageSize>(fpr_size));
1567 void* start = fpr;
1568 if (kIsDebugBuild) {
1569 // In the debug build, the first page of a free page run
1570 // contains a magic number for debugging. Exclude it.
1571 start = reinterpret_cast<uint8_t*>(fpr) + kPageSize;
1572 }
1573 void* end = reinterpret_cast<uint8_t*>(fpr) + fpr_size;
1574 handler(start, end, 0, arg);
1575 size_t num_pages = fpr_size / kPageSize;
1576 if (kIsDebugBuild) {
1577 for (size_t j = i + 1; j < i + num_pages; ++j) {
1578 DCHECK(IsFreePage(j));
1579 }
1580 }
1581 i += fpr_size / kPageSize;
1582 DCHECK_LE(i, pm_end);
1583 break;
1584 }
1585 case kPageMapLargeObject: {
1586 // The start of a large object.
1587 size_t num_pages = 1;
1588 size_t idx = i + 1;
1589 while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1590 num_pages++;
1591 idx++;
1592 }
1593 void* start = base_ + i * kPageSize;
1594 void* end = base_ + (i + num_pages) * kPageSize;
1595 size_t used_bytes = num_pages * kPageSize;
1596 handler(start, end, used_bytes, arg);
1597 if (kIsDebugBuild) {
1598 for (size_t j = i + 1; j < i + num_pages; ++j) {
1599 DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
1600 }
1601 }
1602 i += num_pages;
1603 DCHECK_LE(i, pm_end);
1604 break;
1605 }
1606 case kPageMapLargeObjectPart:
1607 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1608 break;
1609 case kPageMapRun: {
1610 // The start of a run.
1611 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1612 DCHECK_EQ(run->magic_num_, kMagicNum);
1613 // The dedicated full run doesn't contain any real allocations, don't visit the slots in
1614 // there.
1615 run->InspectAllSlots(handler, arg);
1616 size_t num_pages = numOfPages[run->size_bracket_idx_];
1617 if (kIsDebugBuild) {
1618 for (size_t j = i + 1; j < i + num_pages; ++j) {
1619 DCHECK_EQ(page_map_[j], kPageMapRunPart);
1620 }
1621 }
1622 i += num_pages;
1623 DCHECK_LE(i, pm_end);
1624 break;
1625 }
1626 case kPageMapRunPart:
1627 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1628 break;
1629 default:
1630 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
1631 break;
1632 }
1633 }
1634 }
1635
Footprint()1636 size_t RosAlloc::Footprint() {
1637 MutexLock mu(Thread::Current(), lock_);
1638 return footprint_;
1639 }
1640
FootprintLimit()1641 size_t RosAlloc::FootprintLimit() {
1642 MutexLock mu(Thread::Current(), lock_);
1643 return capacity_;
1644 }
1645
SetFootprintLimit(size_t new_capacity)1646 void RosAlloc::SetFootprintLimit(size_t new_capacity) {
1647 MutexLock mu(Thread::Current(), lock_);
1648 DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
1649 // Only growing is supported here. But Trim() is supported.
1650 if (capacity_ < new_capacity) {
1651 CHECK_LE(new_capacity, max_capacity_);
1652 capacity_ = new_capacity;
1653 VLOG(heap) << "new capacity=" << capacity_;
1654 }
1655 }
1656
RevokeThreadLocalRuns(Thread * thread)1657 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
1658 Thread* self = Thread::Current();
1659 // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1660 ReaderMutexLock wmu(self, bulk_free_lock_);
1661 size_t free_bytes = 0U;
1662 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1663 MutexLock mu(self, *size_bracket_locks_[idx]);
1664 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1665 CHECK(thread_local_run != nullptr);
1666 // Invalid means already revoked.
1667 DCHECK(thread_local_run->IsThreadLocal());
1668 if (thread_local_run != dedicated_full_run_) {
1669 // Note the thread local run may not be full here.
1670 thread->SetRosAllocRun(idx, dedicated_full_run_);
1671 DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
1672 // Count the number of free slots left.
1673 size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
1674 free_bytes += num_free_slots * bracketSizes[idx];
1675 bool dont_care;
1676 thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
1677 thread_local_run->SetIsThreadLocal(false);
1678 thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
1679 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
1680 DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
1681 RevokeRun(self, idx, thread_local_run);
1682 }
1683 }
1684 return free_bytes;
1685 }
1686
RevokeRun(Thread * self,size_t idx,Run * run)1687 void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
1688 size_bracket_locks_[idx]->AssertHeld(self);
1689 DCHECK(run != dedicated_full_run_);
1690 if (run->IsFull()) {
1691 if (kIsDebugBuild) {
1692 full_runs_[idx].insert(run);
1693 DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
1694 if (kTraceRosAlloc) {
1695 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1696 << reinterpret_cast<intptr_t>(run)
1697 << " into full_runs_[" << std::dec << idx << "]";
1698 }
1699 }
1700 } else if (run->IsAllFree()) {
1701 run->ZeroHeader();
1702 MutexLock mu(self, lock_);
1703 FreePages(self, run, true);
1704 } else {
1705 non_full_runs_[idx].insert(run);
1706 DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
1707 if (kTraceRosAlloc) {
1708 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1709 << reinterpret_cast<intptr_t>(run)
1710 << " into non_full_runs_[" << std::dec << idx << "]";
1711 }
1712 }
1713 }
1714
RevokeThreadUnsafeCurrentRuns()1715 void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
1716 // Revoke the current runs which share the same idx as thread local runs.
1717 Thread* self = Thread::Current();
1718 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1719 MutexLock mu(self, *size_bracket_locks_[idx]);
1720 if (current_runs_[idx] != dedicated_full_run_) {
1721 RevokeRun(self, idx, current_runs_[idx]);
1722 current_runs_[idx] = dedicated_full_run_;
1723 }
1724 }
1725 }
1726
RevokeAllThreadLocalRuns()1727 size_t RosAlloc::RevokeAllThreadLocalRuns() {
1728 // This is called when a mutator thread won't allocate such as at
1729 // the Zygote creation time or during the GC pause.
1730 MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
1731 MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
1732 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1733 size_t free_bytes = 0U;
1734 for (Thread* thread : thread_list) {
1735 free_bytes += RevokeThreadLocalRuns(thread);
1736 }
1737 RevokeThreadUnsafeCurrentRuns();
1738 return free_bytes;
1739 }
1740
AssertThreadLocalRunsAreRevoked(Thread * thread)1741 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
1742 if (kIsDebugBuild) {
1743 Thread* self = Thread::Current();
1744 // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1745 ReaderMutexLock wmu(self, bulk_free_lock_);
1746 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1747 MutexLock mu(self, *size_bracket_locks_[idx]);
1748 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1749 DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
1750 }
1751 }
1752 }
1753
AssertAllThreadLocalRunsAreRevoked()1754 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
1755 if (kIsDebugBuild) {
1756 Thread* self = Thread::Current();
1757 MutexLock shutdown_mu(self, *Locks::runtime_shutdown_lock_);
1758 MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1759 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1760 for (Thread* t : thread_list) {
1761 AssertThreadLocalRunsAreRevoked(t);
1762 }
1763 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1764 MutexLock brackets_mu(self, *size_bracket_locks_[idx]);
1765 CHECK_EQ(current_runs_[idx], dedicated_full_run_);
1766 }
1767 }
1768 }
1769
Initialize()1770 void RosAlloc::Initialize() {
1771 // bracketSizes.
1772 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1773 if (i < kNumOfSizeBrackets - 2) {
1774 bracketSizes[i] = 16 * (i + 1);
1775 } else if (i == kNumOfSizeBrackets - 2) {
1776 bracketSizes[i] = 1 * KB;
1777 } else {
1778 DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1779 bracketSizes[i] = 2 * KB;
1780 }
1781 if (kTraceRosAlloc) {
1782 LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
1783 }
1784 }
1785 // numOfPages.
1786 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1787 if (i < 4) {
1788 numOfPages[i] = 1;
1789 } else if (i < 8) {
1790 numOfPages[i] = 1;
1791 } else if (i < 16) {
1792 numOfPages[i] = 4;
1793 } else if (i < 32) {
1794 numOfPages[i] = 8;
1795 } else if (i == 32) {
1796 DCHECK_EQ(i, kNumOfSizeBrackets - 2);
1797 numOfPages[i] = 16;
1798 } else {
1799 DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1800 numOfPages[i] = 32;
1801 }
1802 if (kTraceRosAlloc) {
1803 LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
1804 }
1805 }
1806 // Compute numOfSlots and slotOffsets.
1807 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1808 size_t bracket_size = bracketSizes[i];
1809 size_t run_size = kPageSize * numOfPages[i];
1810 size_t max_num_of_slots = run_size / bracket_size;
1811 // Compute the actual number of slots by taking the header and
1812 // alignment into account.
1813 size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
1814 DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
1815 size_t header_size = 0;
1816 size_t bulk_free_bit_map_offset = 0;
1817 size_t thread_local_free_bit_map_offset = 0;
1818 size_t num_of_slots = 0;
1819 // Search for the maximum number of slots that allows enough space
1820 // for the header (including the bit maps.)
1821 for (int s = max_num_of_slots; s >= 0; s--) {
1822 size_t tmp_slots_size = bracket_size * s;
1823 size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
1824 size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
1825 size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
1826 size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
1827 size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
1828 size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
1829 // Align up the unaligned header size. bracket_size may not be a power of two.
1830 size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
1831 tmp_unaligned_header_size :
1832 tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
1833 DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
1834 DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
1835 if (tmp_slots_size + tmp_header_size <= run_size) {
1836 // Found the right number of slots, that is, there was enough
1837 // space for the header (including the bit maps.)
1838 num_of_slots = s;
1839 header_size = tmp_header_size;
1840 bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
1841 thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
1842 break;
1843 }
1844 }
1845 DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
1846 // Add the padding for the alignment remainder.
1847 header_size += run_size % bracket_size;
1848 DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
1849 numOfSlots[i] = num_of_slots;
1850 headerSizes[i] = header_size;
1851 bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
1852 threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
1853 if (kTraceRosAlloc) {
1854 LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
1855 << ", headerSizes[" << i << "]=" << headerSizes[i]
1856 << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
1857 << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
1858 }
1859 }
1860 // Fill the alloc bitmap so nobody can successfully allocate from it.
1861 if (kIsDebugBuild) {
1862 dedicated_full_run_->magic_num_ = kMagicNum;
1863 }
1864 // It doesn't matter which size bracket we use since the main goal is to have the allocation
1865 // fail 100% of the time you attempt to allocate into the dedicated full run.
1866 dedicated_full_run_->size_bracket_idx_ = 0;
1867 dedicated_full_run_->FillAllocBitMap();
1868 dedicated_full_run_->SetIsThreadLocal(true);
1869 }
1870
BytesAllocatedCallback(void * start ATTRIBUTE_UNUSED,void * end ATTRIBUTE_UNUSED,size_t used_bytes,void * arg)1871 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
1872 size_t used_bytes, void* arg) {
1873 if (used_bytes == 0) {
1874 return;
1875 }
1876 size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
1877 *bytes_allocated += used_bytes;
1878 }
1879
ObjectsAllocatedCallback(void * start ATTRIBUTE_UNUSED,void * end ATTRIBUTE_UNUSED,size_t used_bytes,void * arg)1880 void RosAlloc::ObjectsAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
1881 size_t used_bytes, void* arg) {
1882 if (used_bytes == 0) {
1883 return;
1884 }
1885 size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
1886 ++(*objects_allocated);
1887 }
1888
Verify()1889 void RosAlloc::Verify() {
1890 Thread* self = Thread::Current();
1891 CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
1892 << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
1893 MutexLock thread_list_mu(self, *Locks::thread_list_lock_);
1894 ReaderMutexLock wmu(self, bulk_free_lock_);
1895 std::vector<Run*> runs;
1896 {
1897 MutexLock lock_mu(self, lock_);
1898 size_t pm_end = page_map_size_;
1899 size_t i = 0;
1900 size_t valgrind_modifier = running_on_valgrind_ ?
1901 2 * ::art::gc::space::kDefaultValgrindRedZoneBytes : // Redzones before and after.
1902 0;
1903 while (i < pm_end) {
1904 uint8_t pm = page_map_[i];
1905 switch (pm) {
1906 case kPageMapReleased:
1907 // Fall-through.
1908 case kPageMapEmpty: {
1909 // The start of a free page run.
1910 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1911 DCHECK_EQ(fpr->magic_num_, kMagicNumFree);
1912 CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
1913 << "An empty page must belong to the free page run set";
1914 size_t fpr_size = fpr->ByteSize(this);
1915 CHECK(IsAligned<kPageSize>(fpr_size))
1916 << "A free page run size isn't page-aligned : " << fpr_size;
1917 size_t num_pages = fpr_size / kPageSize;
1918 CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1919 << "A free page run size must be > 0 : " << fpr_size;
1920 for (size_t j = i + 1; j < i + num_pages; ++j) {
1921 CHECK(IsFreePage(j))
1922 << "A mismatch between the page map table for kPageMapEmpty "
1923 << " at page index " << j
1924 << " and the free page run size : page index range : "
1925 << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
1926 }
1927 i += num_pages;
1928 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1929 << std::endl << DumpPageMap();
1930 break;
1931 }
1932 case kPageMapLargeObject: {
1933 // The start of a large object.
1934 size_t num_pages = 1;
1935 size_t idx = i + 1;
1936 while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1937 num_pages++;
1938 idx++;
1939 }
1940 uint8_t* start = base_ + i * kPageSize;
1941 if (running_on_valgrind_) {
1942 start += ::art::gc::space::kDefaultValgrindRedZoneBytes;
1943 }
1944 mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
1945 size_t obj_size = obj->SizeOf();
1946 CHECK_GT(obj_size + valgrind_modifier, kLargeSizeThreshold)
1947 << "A rosalloc large object size must be > " << kLargeSizeThreshold;
1948 CHECK_EQ(num_pages, RoundUp(obj_size + valgrind_modifier, kPageSize) / kPageSize)
1949 << "A rosalloc large object size " << obj_size + valgrind_modifier
1950 << " does not match the page map table " << (num_pages * kPageSize)
1951 << std::endl << DumpPageMap();
1952 i += num_pages;
1953 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1954 << std::endl << DumpPageMap();
1955 break;
1956 }
1957 case kPageMapLargeObjectPart:
1958 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl << DumpPageMap();
1959 break;
1960 case kPageMapRun: {
1961 // The start of a run.
1962 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1963 DCHECK_EQ(run->magic_num_, kMagicNum);
1964 size_t idx = run->size_bracket_idx_;
1965 CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
1966 size_t num_pages = numOfPages[idx];
1967 CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1968 << "Run size must be > 0 : " << num_pages;
1969 for (size_t j = i + 1; j < i + num_pages; ++j) {
1970 CHECK_EQ(page_map_[j], kPageMapRunPart)
1971 << "A mismatch between the page map table for kPageMapRunPart "
1972 << " at page index " << j
1973 << " and the run size : page index range " << i << " to " << (i + num_pages)
1974 << std::endl << DumpPageMap();
1975 }
1976 // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
1977 runs.push_back(run);
1978 i += num_pages;
1979 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1980 << std::endl << DumpPageMap();
1981 break;
1982 }
1983 case kPageMapRunPart:
1984 // Fall-through.
1985 default:
1986 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm) << std::endl << DumpPageMap();
1987 break;
1988 }
1989 }
1990 }
1991 std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
1992 for (Thread* thread : threads) {
1993 for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
1994 MutexLock brackets_mu(self, *size_bracket_locks_[i]);
1995 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
1996 CHECK(thread_local_run != nullptr);
1997 CHECK(thread_local_run->IsThreadLocal());
1998 CHECK(thread_local_run == dedicated_full_run_ ||
1999 thread_local_run->size_bracket_idx_ == i);
2000 }
2001 }
2002 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2003 MutexLock brackets_mu(self, *size_bracket_locks_[i]);
2004 Run* current_run = current_runs_[i];
2005 CHECK(current_run != nullptr);
2006 if (current_run != dedicated_full_run_) {
2007 // The dedicated full run is currently marked as thread local.
2008 CHECK(!current_run->IsThreadLocal());
2009 CHECK_EQ(current_run->size_bracket_idx_, i);
2010 }
2011 }
2012 // Call Verify() here for the lock order.
2013 for (auto& run : runs) {
2014 run->Verify(self, this, running_on_valgrind_);
2015 }
2016 }
2017
Verify(Thread * self,RosAlloc * rosalloc,bool running_on_valgrind)2018 void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_valgrind) {
2019 DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
2020 const size_t idx = size_bracket_idx_;
2021 CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
2022 uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
2023 const size_t num_slots = numOfSlots[idx];
2024 const size_t num_vec = RoundUp(num_slots, 32) / 32;
2025 CHECK_GT(num_vec, 0U);
2026 size_t bracket_size = IndexToBracketSize(idx);
2027 CHECK_EQ(slot_base + num_slots * bracket_size,
2028 reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize)
2029 << "Mismatch in the end address of the run " << Dump();
2030 // Check that the bulk free bitmap is clean. It's only used during BulkFree().
2031 CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
2032 uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
2033 // Make sure all the bits at the end of the run are set so that we don't allocate there.
2034 CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
2035 // Ensure that the first bitmap index is valid.
2036 CHECK_LT(first_search_vec_idx_, num_vec);
2037 // Check the thread local runs, the current runs, and the run sets.
2038 if (IsThreadLocal()) {
2039 // If it's a thread local run, then it must be pointed to by an owner thread.
2040 bool owner_found = false;
2041 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
2042 for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
2043 Thread* thread = *it;
2044 for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
2045 MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2046 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
2047 if (thread_local_run == this) {
2048 CHECK(!owner_found)
2049 << "A thread local run has more than one owner thread " << Dump();
2050 CHECK_EQ(i, idx)
2051 << "A mismatching size bracket index in a thread local run " << Dump();
2052 owner_found = true;
2053 }
2054 }
2055 }
2056 CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
2057 } else {
2058 // If it's not thread local, check that the thread local free bitmap is clean.
2059 CHECK(IsThreadLocalFreeBitmapClean())
2060 << "A non-thread-local run's thread local free bitmap isn't clean "
2061 << Dump();
2062 // Check if it's a current run for the size bucket.
2063 bool is_current_run = false;
2064 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2065 MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2066 Run* current_run = rosalloc->current_runs_[i];
2067 if (idx == i) {
2068 if (this == current_run) {
2069 is_current_run = true;
2070 }
2071 } else {
2072 // If the size bucket index does not match, then it must not
2073 // be a current run.
2074 CHECK_NE(this, current_run)
2075 << "A current run points to a run with a wrong size bracket index " << Dump();
2076 }
2077 }
2078 // If it's neither a thread local or current run, then it must be
2079 // in a run set.
2080 if (!is_current_run) {
2081 MutexLock mu(self, rosalloc->lock_);
2082 auto& non_full_runs = rosalloc->non_full_runs_[idx];
2083 // If it's all free, it must be a free page run rather than a run.
2084 CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
2085 if (!IsFull()) {
2086 // If it's not full, it must in the non-full run set.
2087 CHECK(non_full_runs.find(this) != non_full_runs.end())
2088 << "A non-full run isn't in the non-full run set " << Dump();
2089 } else {
2090 // If it's full, it must in the full run set (debug build only.)
2091 if (kIsDebugBuild) {
2092 auto& full_runs = rosalloc->full_runs_[idx];
2093 CHECK(full_runs.find(this) != full_runs.end())
2094 << " A full run isn't in the full run set " << Dump();
2095 }
2096 }
2097 }
2098 }
2099 // Check each slot.
2100 size_t slots = 0;
2101 size_t valgrind_modifier = running_on_valgrind ?
2102 2 * ::art::gc::space::kDefaultValgrindRedZoneBytes :
2103 0U;
2104 for (size_t v = 0; v < num_vec; v++, slots += 32) {
2105 DCHECK_GE(num_slots, slots) << "Out of bounds";
2106 uint32_t vec = alloc_bit_map_[v];
2107 uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
2108 size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
2109 for (size_t i = 0; i < end; ++i) {
2110 bool is_allocated = ((vec >> i) & 0x1) != 0;
2111 // If a thread local run, slots may be marked freed in the
2112 // thread local free bitmap.
2113 bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
2114 if (is_allocated && !is_thread_local_freed) {
2115 uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
2116 if (running_on_valgrind) {
2117 slot_addr += ::art::gc::space::kDefaultValgrindRedZoneBytes;
2118 }
2119 mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
2120 size_t obj_size = obj->SizeOf();
2121 CHECK_LE(obj_size + valgrind_modifier, kLargeSizeThreshold)
2122 << "A run slot contains a large object " << Dump();
2123 CHECK_EQ(SizeToIndex(obj_size + valgrind_modifier), idx)
2124 << PrettyTypeOf(obj) << " "
2125 << "obj_size=" << obj_size << "(" << obj_size + valgrind_modifier << "), idx=" << idx
2126 << " A run slot contains an object with wrong size " << Dump();
2127 }
2128 }
2129 }
2130 }
2131
ReleasePages()2132 size_t RosAlloc::ReleasePages() {
2133 VLOG(heap) << "RosAlloc::ReleasePages()";
2134 DCHECK(!DoesReleaseAllPages());
2135 Thread* self = Thread::Current();
2136 size_t reclaimed_bytes = 0;
2137 size_t i = 0;
2138 // Check the page map size which might have changed due to grow/shrink.
2139 while (i < page_map_size_) {
2140 // Reading the page map without a lock is racy but the race is benign since it should only
2141 // result in occasionally not releasing pages which we could release.
2142 uint8_t pm = page_map_[i];
2143 switch (pm) {
2144 case kPageMapReleased:
2145 // Fall through.
2146 case kPageMapEmpty: {
2147 // This is currently the start of a free page run.
2148 // Acquire the lock to prevent other threads racing in and modifying the page map.
2149 MutexLock mu(self, lock_);
2150 // Check that it's still empty after we acquired the lock since another thread could have
2151 // raced in and placed an allocation here.
2152 if (IsFreePage(i)) {
2153 // Free page runs can start with a released page if we coalesced a released page free
2154 // page run with an empty page run.
2155 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
2156 // There is a race condition where FreePage can coalesce fpr with the previous
2157 // free page run before we acquire lock_. In that case free_page_runs_.find will not find
2158 // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
2159 // to the next page.
2160 if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
2161 size_t fpr_size = fpr->ByteSize(this);
2162 DCHECK(IsAligned<kPageSize>(fpr_size));
2163 uint8_t* start = reinterpret_cast<uint8_t*>(fpr);
2164 reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
2165 size_t pages = fpr_size / kPageSize;
2166 CHECK_GT(pages, 0U) << "Infinite loop probable";
2167 i += pages;
2168 DCHECK_LE(i, page_map_size_);
2169 break;
2170 }
2171 }
2172 FALLTHROUGH_INTENDED;
2173 }
2174 case kPageMapLargeObject: // Fall through.
2175 case kPageMapLargeObjectPart: // Fall through.
2176 case kPageMapRun: // Fall through.
2177 case kPageMapRunPart: // Fall through.
2178 ++i;
2179 break; // Skip.
2180 default:
2181 LOG(FATAL) << "Unreachable - page map type: " << static_cast<int>(pm);
2182 break;
2183 }
2184 }
2185 return reclaimed_bytes;
2186 }
2187
ReleasePageRange(uint8_t * start,uint8_t * end)2188 size_t RosAlloc::ReleasePageRange(uint8_t* start, uint8_t* end) {
2189 DCHECK_ALIGNED(start, kPageSize);
2190 DCHECK_ALIGNED(end, kPageSize);
2191 DCHECK_LT(start, end);
2192 if (kIsDebugBuild) {
2193 // In the debug build, the first page of a free page run
2194 // contains a magic number for debugging. Exclude it.
2195 start += kPageSize;
2196
2197 // Single pages won't be released.
2198 if (start == end) {
2199 return 0;
2200 }
2201 }
2202 if (!kMadviseZeroes) {
2203 // TODO: Do this when we resurrect the page instead.
2204 memset(start, 0, end - start);
2205 }
2206 CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
2207 size_t pm_idx = ToPageMapIndex(start);
2208 size_t reclaimed_bytes = 0;
2209 // Calculate reclaimed bytes and upate page map.
2210 const size_t max_idx = pm_idx + (end - start) / kPageSize;
2211 for (; pm_idx < max_idx; ++pm_idx) {
2212 DCHECK(IsFreePage(pm_idx));
2213 if (page_map_[pm_idx] == kPageMapEmpty) {
2214 // Mark the page as released and update how many bytes we released.
2215 reclaimed_bytes += kPageSize;
2216 page_map_[pm_idx] = kPageMapReleased;
2217 }
2218 }
2219 return reclaimed_bytes;
2220 }
2221
LogFragmentationAllocFailure(std::ostream & os,size_t failed_alloc_bytes)2222 void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
2223 Thread* self = Thread::Current();
2224 size_t largest_continuous_free_pages = 0;
2225 WriterMutexLock wmu(self, bulk_free_lock_);
2226 MutexLock mu(self, lock_);
2227 for (FreePageRun* fpr : free_page_runs_) {
2228 largest_continuous_free_pages = std::max(largest_continuous_free_pages,
2229 fpr->ByteSize(this));
2230 }
2231 if (failed_alloc_bytes > kLargeSizeThreshold) {
2232 // Large allocation.
2233 size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
2234 if (required_bytes > largest_continuous_free_pages) {
2235 os << "; failed due to fragmentation (required continguous free "
2236 << required_bytes << " bytes where largest contiguous free "
2237 << largest_continuous_free_pages << " bytes)";
2238 }
2239 } else {
2240 // Non-large allocation.
2241 size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
2242 if (required_bytes > largest_continuous_free_pages) {
2243 os << "; failed due to fragmentation (required continguous free "
2244 << required_bytes << " bytes for a new buffer where largest contiguous free "
2245 << largest_continuous_free_pages << " bytes)";
2246 }
2247 }
2248 }
2249
2250 } // namespace allocator
2251 } // namespace gc
2252 } // namespace art
2253