1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "base/mutex-inl.h"
18 #include "mirror/class-inl.h"
19 #include "mirror/object.h"
20 #include "mirror/object-inl.h"
21 #include "thread-inl.h"
22 #include "thread_list.h"
23 #include "rosalloc.h"
24
25 #include <map>
26 #include <list>
27 #include <vector>
28
29 namespace art {
30 namespace gc {
31 namespace allocator {
32
33 extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
34
35 static constexpr bool kUsePrefetchDuringAllocRun = true;
36 static constexpr bool kPrefetchNewRunDataByZeroing = false;
37 static constexpr size_t kPrefetchStride = 64;
38
39 size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
40 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
41 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
42 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
43 size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
44 size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
45 bool RosAlloc::initialized_ = false;
46 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
47 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
48 reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
49
RosAlloc(void * base,size_t capacity,size_t max_capacity,PageReleaseMode page_release_mode,size_t page_release_size_threshold)50 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
51 PageReleaseMode page_release_mode, size_t page_release_size_threshold)
52 : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
53 capacity_(capacity), max_capacity_(max_capacity),
54 lock_("rosalloc global lock", kRosAllocGlobalLock),
55 bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
56 page_release_mode_(page_release_mode),
57 page_release_size_threshold_(page_release_size_threshold) {
58 DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
59 DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
60 CHECK_LE(capacity, max_capacity);
61 CHECK(IsAligned<kPageSize>(page_release_size_threshold_));
62 if (!initialized_) {
63 Initialize();
64 }
65 VLOG(heap) << "RosAlloc base="
66 << std::hex << (intptr_t)base_ << ", end="
67 << std::hex << (intptr_t)(base_ + capacity_)
68 << ", capacity=" << std::dec << capacity_
69 << ", max_capacity=" << std::dec << max_capacity_;
70 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
71 size_bracket_lock_names_[i] =
72 StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i));
73 size_bracket_locks_[i] = new Mutex(size_bracket_lock_names_[i].c_str(), kRosAllocBracketLock);
74 current_runs_[i] = dedicated_full_run_;
75 }
76 DCHECK_EQ(footprint_, capacity_);
77 size_t num_of_pages = footprint_ / kPageSize;
78 size_t max_num_of_pages = max_capacity_ / kPageSize;
79 std::string error_msg;
80 page_map_mem_map_.reset(MemMap::MapAnonymous("rosalloc page map", NULL, RoundUp(max_num_of_pages, kPageSize),
81 PROT_READ | PROT_WRITE, false, &error_msg));
82 CHECK(page_map_mem_map_.get() != nullptr) << "Couldn't allocate the page map : " << error_msg;
83 page_map_ = page_map_mem_map_->Begin();
84 page_map_size_ = num_of_pages;
85 max_page_map_size_ = max_num_of_pages;
86 free_page_run_size_map_.resize(num_of_pages);
87 FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
88 if (kIsDebugBuild) {
89 free_pages->magic_num_ = kMagicNumFree;
90 }
91 free_pages->SetByteSize(this, capacity_);
92 DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
93 DCHECK(free_pages->IsFree());
94 free_pages->ReleasePages(this);
95 DCHECK(free_pages->IsFree());
96 free_page_runs_.insert(free_pages);
97 if (kTraceRosAlloc) {
98 LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
99 << reinterpret_cast<intptr_t>(free_pages)
100 << " into free_page_runs_";
101 }
102 }
103
~RosAlloc()104 RosAlloc::~RosAlloc() {
105 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
106 delete size_bracket_locks_[i];
107 }
108 }
109
AllocPages(Thread * self,size_t num_pages,byte page_map_type)110 void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
111 lock_.AssertHeld(self);
112 DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
113 FreePageRun* res = NULL;
114 const size_t req_byte_size = num_pages * kPageSize;
115 // Find the lowest address free page run that's large enough.
116 for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
117 FreePageRun* fpr = *it;
118 DCHECK(fpr->IsFree());
119 size_t fpr_byte_size = fpr->ByteSize(this);
120 DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
121 if (req_byte_size <= fpr_byte_size) {
122 // Found one.
123 free_page_runs_.erase(it++);
124 if (kTraceRosAlloc) {
125 LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
126 << std::hex << reinterpret_cast<intptr_t>(fpr)
127 << " from free_page_runs_";
128 }
129 if (req_byte_size < fpr_byte_size) {
130 // Split.
131 FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
132 if (kIsDebugBuild) {
133 remainder->magic_num_ = kMagicNumFree;
134 }
135 remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
136 DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
137 // Don't need to call madvise on remainder here.
138 free_page_runs_.insert(remainder);
139 if (kTraceRosAlloc) {
140 LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
141 << reinterpret_cast<intptr_t>(remainder)
142 << " into free_page_runs_";
143 }
144 fpr->SetByteSize(this, req_byte_size);
145 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
146 }
147 res = fpr;
148 break;
149 } else {
150 ++it;
151 }
152 }
153
154 // Failed to allocate pages. Grow the footprint, if possible.
155 if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
156 FreePageRun* last_free_page_run = NULL;
157 size_t last_free_page_run_size;
158 auto it = free_page_runs_.rbegin();
159 if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
160 // There is a free page run at the end.
161 DCHECK(last_free_page_run->IsFree());
162 DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
163 last_free_page_run_size = last_free_page_run->ByteSize(this);
164 } else {
165 // There is no free page run at the end.
166 last_free_page_run_size = 0;
167 }
168 DCHECK_LT(last_free_page_run_size, req_byte_size);
169 if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
170 // If we grow the heap, we can allocate it.
171 size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
172 capacity_ - footprint_);
173 DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
174 size_t new_footprint = footprint_ + increment;
175 size_t new_num_of_pages = new_footprint / kPageSize;
176 DCHECK_LT(page_map_size_, new_num_of_pages);
177 DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
178 page_map_size_ = new_num_of_pages;
179 DCHECK_LE(page_map_size_, max_page_map_size_);
180 free_page_run_size_map_.resize(new_num_of_pages);
181 art_heap_rosalloc_morecore(this, increment);
182 if (last_free_page_run_size > 0) {
183 // There was a free page run at the end. Expand its size.
184 DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
185 last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
186 DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
187 DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
188 } else {
189 // Otherwise, insert a new free page run at the end.
190 FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
191 if (kIsDebugBuild) {
192 new_free_page_run->magic_num_ = kMagicNumFree;
193 }
194 new_free_page_run->SetByteSize(this, increment);
195 DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
196 free_page_runs_.insert(new_free_page_run);
197 DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
198 if (kTraceRosAlloc) {
199 LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
200 << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
201 << " into free_page_runs_";
202 }
203 }
204 DCHECK_LE(footprint_ + increment, capacity_);
205 if (kTraceRosAlloc) {
206 LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
207 << footprint_ << " to " << new_footprint;
208 }
209 footprint_ = new_footprint;
210
211 // And retry the last free page run.
212 it = free_page_runs_.rbegin();
213 DCHECK(it != free_page_runs_.rend());
214 FreePageRun* fpr = *it;
215 if (kIsDebugBuild && last_free_page_run_size > 0) {
216 DCHECK(last_free_page_run != NULL);
217 DCHECK_EQ(last_free_page_run, fpr);
218 }
219 size_t fpr_byte_size = fpr->ByteSize(this);
220 DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
221 DCHECK_LE(req_byte_size, fpr_byte_size);
222 free_page_runs_.erase(fpr);
223 if (kTraceRosAlloc) {
224 LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
225 << " from free_page_runs_";
226 }
227 if (req_byte_size < fpr_byte_size) {
228 // Split if there's a remainder.
229 FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
230 if (kIsDebugBuild) {
231 remainder->magic_num_ = kMagicNumFree;
232 }
233 remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
234 DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
235 free_page_runs_.insert(remainder);
236 if (kTraceRosAlloc) {
237 LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
238 << reinterpret_cast<intptr_t>(remainder)
239 << " into free_page_runs_";
240 }
241 fpr->SetByteSize(this, req_byte_size);
242 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
243 }
244 res = fpr;
245 }
246 }
247 if (LIKELY(res != NULL)) {
248 // Update the page map.
249 size_t page_map_idx = ToPageMapIndex(res);
250 for (size_t i = 0; i < num_pages; i++) {
251 DCHECK(IsFreePage(page_map_idx + i));
252 }
253 switch (page_map_type) {
254 case kPageMapRun:
255 page_map_[page_map_idx] = kPageMapRun;
256 for (size_t i = 1; i < num_pages; i++) {
257 page_map_[page_map_idx + i] = kPageMapRunPart;
258 }
259 break;
260 case kPageMapLargeObject:
261 page_map_[page_map_idx] = kPageMapLargeObject;
262 for (size_t i = 1; i < num_pages; i++) {
263 page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
264 }
265 break;
266 default:
267 LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
268 break;
269 }
270 if (kIsDebugBuild) {
271 // Clear the first page since it is not madvised due to the magic number.
272 memset(res, 0, kPageSize);
273 }
274 if (kTraceRosAlloc) {
275 LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
276 << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
277 << "(" << std::dec << (num_pages * kPageSize) << ")";
278 }
279 return res;
280 }
281
282 // Fail.
283 if (kTraceRosAlloc) {
284 LOG(INFO) << "RosAlloc::AllocPages() : NULL";
285 }
286 return nullptr;
287 }
288
FreePages(Thread * self,void * ptr,bool already_zero)289 size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
290 lock_.AssertHeld(self);
291 size_t pm_idx = ToPageMapIndex(ptr);
292 DCHECK_LT(pm_idx, page_map_size_);
293 byte pm_type = page_map_[pm_idx];
294 DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
295 byte pm_part_type;
296 switch (pm_type) {
297 case kPageMapRun:
298 pm_part_type = kPageMapRunPart;
299 break;
300 case kPageMapLargeObject:
301 pm_part_type = kPageMapLargeObjectPart;
302 break;
303 default:
304 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
305 << static_cast<int>(pm_type) << ", ptr=" << std::hex
306 << reinterpret_cast<intptr_t>(ptr);
307 return 0;
308 }
309 // Update the page map and count the number of pages.
310 size_t num_pages = 1;
311 page_map_[pm_idx] = kPageMapEmpty;
312 size_t idx = pm_idx + 1;
313 size_t end = page_map_size_;
314 while (idx < end && page_map_[idx] == pm_part_type) {
315 page_map_[idx] = kPageMapEmpty;
316 num_pages++;
317 idx++;
318 }
319 const size_t byte_size = num_pages * kPageSize;
320 if (already_zero) {
321 if (kCheckZeroMemory) {
322 const uword* word_ptr = reinterpret_cast<uword*>(ptr);
323 for (size_t i = 0; i < byte_size / sizeof(uword); ++i) {
324 CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
325 }
326 }
327 } else if (!DoesReleaseAllPages()) {
328 memset(ptr, 0, byte_size);
329 }
330
331 if (kTraceRosAlloc) {
332 LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
333 << "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
334 << "(" << std::dec << (num_pages * kPageSize) << ")";
335 }
336
337 // Turn it into a free run.
338 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
339 if (kIsDebugBuild) {
340 fpr->magic_num_ = kMagicNumFree;
341 }
342 fpr->SetByteSize(this, byte_size);
343 DCHECK(IsAligned<kPageSize>(fpr->ByteSize(this)));
344
345 DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
346 if (!free_page_runs_.empty()) {
347 // Try to coalesce in the higher address direction.
348 if (kTraceRosAlloc) {
349 LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
350 << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
351 << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
352 << (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
353 }
354 auto higher_it = free_page_runs_.upper_bound(fpr);
355 if (higher_it != free_page_runs_.end()) {
356 for (auto it = higher_it; it != free_page_runs_.end(); ) {
357 FreePageRun* h = *it;
358 DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
359 if (kTraceRosAlloc) {
360 LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
361 << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
362 << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
363 << (h->End(this) == End() ? page_map_size_ : ToPageMapIndex(h->End(this))) << "]";
364 }
365 if (fpr->End(this) == h->Begin()) {
366 if (kTraceRosAlloc) {
367 LOG(INFO) << "Success";
368 }
369 // Clear magic num since this is no longer the start of a free page run.
370 if (kIsDebugBuild) {
371 h->magic_num_ = 0;
372 }
373 free_page_runs_.erase(it++);
374 if (kTraceRosAlloc) {
375 LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
376 << reinterpret_cast<intptr_t>(h)
377 << " from free_page_runs_";
378 }
379 fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
380 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
381 } else {
382 // Not adjacent. Stop.
383 if (kTraceRosAlloc) {
384 LOG(INFO) << "Fail";
385 }
386 break;
387 }
388 }
389 }
390 // Try to coalesce in the lower address direction.
391 auto lower_it = free_page_runs_.upper_bound(fpr);
392 if (lower_it != free_page_runs_.begin()) {
393 --lower_it;
394 for (auto it = lower_it; ; ) {
395 // We want to try to coalesce with the first element but
396 // there's no "<=" operator for the iterator.
397 bool to_exit_loop = it == free_page_runs_.begin();
398
399 FreePageRun* l = *it;
400 DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
401 if (kTraceRosAlloc) {
402 LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
403 << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
404 << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
405 << (l->End(this) == End() ? page_map_size_ : ToPageMapIndex(l->End(this))) << "]";
406 }
407 if (l->End(this) == fpr->Begin()) {
408 if (kTraceRosAlloc) {
409 LOG(INFO) << "Success";
410 }
411 free_page_runs_.erase(it--);
412 if (kTraceRosAlloc) {
413 LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
414 << reinterpret_cast<intptr_t>(l)
415 << " from free_page_runs_";
416 }
417 l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
418 DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
419 // Clear magic num since this is no longer the start of a free page run.
420 if (kIsDebugBuild) {
421 fpr->magic_num_ = 0;
422 }
423 fpr = l;
424 } else {
425 // Not adjacent. Stop.
426 if (kTraceRosAlloc) {
427 LOG(INFO) << "Fail";
428 }
429 break;
430 }
431 if (to_exit_loop) {
432 break;
433 }
434 }
435 }
436 }
437
438 // Insert it.
439 DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
440 DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
441 DCHECK(fpr->IsFree());
442 fpr->ReleasePages(this);
443 DCHECK(fpr->IsFree());
444 free_page_runs_.insert(fpr);
445 DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
446 if (kTraceRosAlloc) {
447 LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
448 << " into free_page_runs_";
449 }
450 return byte_size;
451 }
452
AllocLargeObject(Thread * self,size_t size,size_t * bytes_allocated)453 void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
454 DCHECK_GT(size, kLargeSizeThreshold);
455 size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
456 void* r;
457 {
458 MutexLock mu(self, lock_);
459 r = AllocPages(self, num_pages, kPageMapLargeObject);
460 }
461 if (UNLIKELY(r == nullptr)) {
462 if (kTraceRosAlloc) {
463 LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
464 }
465 return nullptr;
466 }
467 const size_t total_bytes = num_pages * kPageSize;
468 *bytes_allocated = total_bytes;
469 if (kTraceRosAlloc) {
470 LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
471 << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
472 << "(" << std::dec << (num_pages * kPageSize) << ")";
473 }
474 // Check if the returned memory is really all zero.
475 if (kCheckZeroMemory) {
476 CHECK_EQ(total_bytes % sizeof(uword), 0U);
477 const uword* words = reinterpret_cast<uword*>(r);
478 for (size_t i = 0; i < total_bytes / sizeof(uword); ++i) {
479 CHECK_EQ(words[i], 0U);
480 }
481 }
482 return r;
483 }
484
FreeInternal(Thread * self,void * ptr)485 size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
486 DCHECK_LE(base_, ptr);
487 DCHECK_LT(ptr, base_ + footprint_);
488 size_t pm_idx = RoundDownToPageMapIndex(ptr);
489 Run* run = nullptr;
490 {
491 MutexLock mu(self, lock_);
492 DCHECK_LT(pm_idx, page_map_size_);
493 byte page_map_entry = page_map_[pm_idx];
494 if (kTraceRosAlloc) {
495 LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
496 << ", page_map_entry=" << static_cast<int>(page_map_entry);
497 }
498 switch (page_map_[pm_idx]) {
499 case kPageMapLargeObject:
500 return FreePages(self, ptr, false);
501 case kPageMapLargeObjectPart:
502 LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
503 return 0;
504 case kPageMapRunPart: {
505 // Find the beginning of the run.
506 do {
507 --pm_idx;
508 DCHECK_LT(pm_idx, capacity_ / kPageSize);
509 } while (page_map_[pm_idx] != kPageMapRun);
510 // Fall-through.
511 case kPageMapRun:
512 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
513 DCHECK_EQ(run->magic_num_, kMagicNum);
514 break;
515 case kPageMapReleased:
516 // Fall-through.
517 case kPageMapEmpty:
518 LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
519 return 0;
520 }
521 default:
522 LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
523 return 0;
524 }
525 }
526 DCHECK(run != nullptr);
527 return FreeFromRun(self, ptr, run);
528 }
529
Free(Thread * self,void * ptr)530 size_t RosAlloc::Free(Thread* self, void* ptr) {
531 ReaderMutexLock rmu(self, bulk_free_lock_);
532 return FreeInternal(self, ptr);
533 }
534
AllocRun(Thread * self,size_t idx)535 RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) {
536 RosAlloc::Run* new_run = nullptr;
537 {
538 MutexLock mu(self, lock_);
539 new_run = reinterpret_cast<Run*>(AllocPages(self, numOfPages[idx], kPageMapRun));
540 }
541 if (LIKELY(new_run != nullptr)) {
542 if (kIsDebugBuild) {
543 new_run->magic_num_ = kMagicNum;
544 }
545 new_run->size_bracket_idx_ = idx;
546 new_run->SetAllocBitMapBitsForInvalidSlots();
547 DCHECK(!new_run->IsThreadLocal());
548 DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
549 DCHECK(!new_run->to_be_bulk_freed_);
550 if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
551 // Take ownership of the cache lines if we are likely to be thread local run.
552 if (kPrefetchNewRunDataByZeroing) {
553 // Zeroing the data is sometimes faster than prefetching but it increases memory usage
554 // since we end up dirtying zero pages which may have been madvised.
555 new_run->ZeroData();
556 } else {
557 const size_t num_of_slots = numOfSlots[idx];
558 const size_t bracket_size = bracketSizes[idx];
559 const size_t num_of_bytes = num_of_slots * bracket_size;
560 byte* begin = reinterpret_cast<byte*>(new_run) + headerSizes[idx];
561 for (size_t i = 0; i < num_of_bytes; i += kPrefetchStride) {
562 __builtin_prefetch(begin + i);
563 }
564 }
565 }
566 }
567 return new_run;
568 }
569
RefillRun(Thread * self,size_t idx)570 RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
571 // Get the lowest address non-full run from the binary tree.
572 std::set<Run*>* const bt = &non_full_runs_[idx];
573 if (!bt->empty()) {
574 // If there's one, use it as the current run.
575 auto it = bt->begin();
576 Run* non_full_run = *it;
577 DCHECK(non_full_run != nullptr);
578 DCHECK(!non_full_run->IsThreadLocal());
579 bt->erase(it);
580 return non_full_run;
581 }
582 // If there's none, allocate a new run and use it as the current run.
583 return AllocRun(self, idx);
584 }
585
AllocFromCurrentRunUnlocked(Thread * self,size_t idx)586 inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) {
587 Run* current_run = current_runs_[idx];
588 DCHECK(current_run != nullptr);
589 void* slot_addr = current_run->AllocSlot();
590 if (UNLIKELY(slot_addr == nullptr)) {
591 // The current run got full. Try to refill it.
592 DCHECK(current_run->IsFull());
593 if (kIsDebugBuild && current_run != dedicated_full_run_) {
594 full_runs_[idx].insert(current_run);
595 if (kTraceRosAlloc) {
596 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
597 << reinterpret_cast<intptr_t>(current_run)
598 << " into full_runs_[" << std::dec << idx << "]";
599 }
600 DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
601 DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
602 }
603 current_run = RefillRun(self, idx);
604 if (UNLIKELY(current_run == nullptr)) {
605 // Failed to allocate a new run, make sure that it is the dedicated full run.
606 current_runs_[idx] = dedicated_full_run_;
607 return nullptr;
608 }
609 DCHECK(current_run != nullptr);
610 DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
611 DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
612 current_run->SetIsThreadLocal(false);
613 current_runs_[idx] = current_run;
614 DCHECK(!current_run->IsFull());
615 slot_addr = current_run->AllocSlot();
616 // Must succeed now with a new run.
617 DCHECK(slot_addr != nullptr);
618 }
619 return slot_addr;
620 }
621
AllocFromRunThreadUnsafe(Thread * self,size_t size,size_t * bytes_allocated)622 void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) {
623 DCHECK_LE(size, kLargeSizeThreshold);
624 size_t bracket_size;
625 size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
626 DCHECK_EQ(idx, SizeToIndex(size));
627 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
628 DCHECK_EQ(bracket_size, bracketSizes[idx]);
629 DCHECK_LE(size, bracket_size);
630 DCHECK(size > 512 || bracket_size - size < 16);
631 Locks::mutator_lock_->AssertExclusiveHeld(self);
632 void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
633 if (LIKELY(slot_addr != nullptr)) {
634 DCHECK(bytes_allocated != nullptr);
635 *bytes_allocated = bracket_size;
636 // Caller verifies that it is all 0.
637 }
638 return slot_addr;
639 }
640
AllocFromRun(Thread * self,size_t size,size_t * bytes_allocated)641 void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
642 DCHECK_LE(size, kLargeSizeThreshold);
643 size_t bracket_size;
644 size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
645 DCHECK_EQ(idx, SizeToIndex(size));
646 DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
647 DCHECK_EQ(bracket_size, bracketSizes[idx]);
648 DCHECK_LE(size, bracket_size);
649 DCHECK(size > 512 || bracket_size - size < 16);
650
651 void* slot_addr;
652
653 if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
654 // Use a thread-local run.
655 Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
656 // Allow invalid since this will always fail the allocation.
657 if (kIsDebugBuild) {
658 // Need the lock to prevent race conditions.
659 MutexLock mu(self, *size_bracket_locks_[idx]);
660 CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
661 CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
662 }
663 DCHECK(thread_local_run != nullptr);
664 DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_);
665 slot_addr = thread_local_run->AllocSlot();
666 // The allocation must fail if the run is invalid.
667 DCHECK(thread_local_run != dedicated_full_run_ || slot_addr == nullptr)
668 << "allocated from an invalid run";
669 if (UNLIKELY(slot_addr == nullptr)) {
670 // The run got full. Try to free slots.
671 DCHECK(thread_local_run->IsFull());
672 MutexLock mu(self, *size_bracket_locks_[idx]);
673 bool is_all_free_after_merge;
674 // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
675 if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
676 DCHECK_NE(thread_local_run, dedicated_full_run_);
677 // Some slot got freed. Keep it.
678 DCHECK(!thread_local_run->IsFull());
679 DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
680 if (is_all_free_after_merge) {
681 // Check that the bitmap idx is back at 0 if it's all free.
682 DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
683 }
684 } else {
685 // No slots got freed. Try to refill the thread-local run.
686 DCHECK(thread_local_run->IsFull());
687 if (thread_local_run != dedicated_full_run_) {
688 thread_local_run->SetIsThreadLocal(false);
689 if (kIsDebugBuild) {
690 full_runs_[idx].insert(thread_local_run);
691 if (kTraceRosAlloc) {
692 LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
693 << reinterpret_cast<intptr_t>(thread_local_run)
694 << " into full_runs_[" << std::dec << idx << "]";
695 }
696 }
697 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
698 DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
699 }
700
701 thread_local_run = RefillRun(self, idx);
702 if (UNLIKELY(thread_local_run == nullptr)) {
703 self->SetRosAllocRun(idx, dedicated_full_run_);
704 return nullptr;
705 }
706 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
707 DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
708 thread_local_run->SetIsThreadLocal(true);
709 self->SetRosAllocRun(idx, thread_local_run);
710 DCHECK(!thread_local_run->IsFull());
711 }
712
713 DCHECK(thread_local_run != nullptr);
714 DCHECK(!thread_local_run->IsFull());
715 DCHECK(thread_local_run->IsThreadLocal());
716 slot_addr = thread_local_run->AllocSlot();
717 // Must succeed now with a new run.
718 DCHECK(slot_addr != nullptr);
719 }
720 if (kTraceRosAlloc) {
721 LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
722 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
723 << "(" << std::dec << (bracket_size) << ")";
724 }
725 } else {
726 // Use the (shared) current run.
727 MutexLock mu(self, *size_bracket_locks_[idx]);
728 slot_addr = AllocFromCurrentRunUnlocked(self, idx);
729 if (kTraceRosAlloc) {
730 LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
731 << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
732 << "(" << std::dec << (bracket_size) << ")";
733 }
734 }
735 DCHECK(bytes_allocated != nullptr);
736 *bytes_allocated = bracket_size;
737 // Caller verifies that it is all 0.
738 return slot_addr;
739 }
740
FreeFromRun(Thread * self,void * ptr,Run * run)741 size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
742 DCHECK_EQ(run->magic_num_, kMagicNum);
743 DCHECK_LT(run, ptr);
744 DCHECK_LT(ptr, run->End());
745 const size_t idx = run->size_bracket_idx_;
746 const size_t bracket_size = bracketSizes[idx];
747 bool run_was_full = false;
748 MutexLock mu(self, *size_bracket_locks_[idx]);
749 if (kIsDebugBuild) {
750 run_was_full = run->IsFull();
751 }
752 if (kTraceRosAlloc) {
753 LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
754 }
755 if (LIKELY(run->IsThreadLocal())) {
756 // It's a thread-local run. Just mark the thread-local free bit map and return.
757 DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
758 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
759 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
760 run->MarkThreadLocalFreeBitMap(ptr);
761 if (kTraceRosAlloc) {
762 LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
763 << reinterpret_cast<intptr_t>(run);
764 }
765 // A thread local run will be kept as a thread local even if it's become all free.
766 return bracket_size;
767 }
768 // Free the slot in the run.
769 run->FreeSlot(ptr);
770 std::set<Run*>* non_full_runs = &non_full_runs_[idx];
771 if (run->IsAllFree()) {
772 // It has just become completely free. Free the pages of this run.
773 std::set<Run*>::iterator pos = non_full_runs->find(run);
774 if (pos != non_full_runs->end()) {
775 non_full_runs->erase(pos);
776 if (kTraceRosAlloc) {
777 LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
778 << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
779 }
780 }
781 if (run == current_runs_[idx]) {
782 current_runs_[idx] = dedicated_full_run_;
783 }
784 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
785 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
786 run->ZeroHeader();
787 {
788 MutexLock mu(self, lock_);
789 FreePages(self, run, true);
790 }
791 } else {
792 // It is not completely free. If it wasn't the current run or
793 // already in the non-full run set (i.e., it was full) insert it
794 // into the non-full run set.
795 if (run != current_runs_[idx]) {
796 std::unordered_set<Run*, hash_run, eq_run>* full_runs =
797 kIsDebugBuild ? &full_runs_[idx] : NULL;
798 std::set<Run*>::iterator pos = non_full_runs->find(run);
799 if (pos == non_full_runs->end()) {
800 DCHECK(run_was_full);
801 DCHECK(full_runs->find(run) != full_runs->end());
802 if (kIsDebugBuild) {
803 full_runs->erase(run);
804 if (kTraceRosAlloc) {
805 LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
806 << reinterpret_cast<intptr_t>(run) << " from full_runs_";
807 }
808 }
809 non_full_runs->insert(run);
810 DCHECK(!run->IsFull());
811 if (kTraceRosAlloc) {
812 LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
813 << reinterpret_cast<intptr_t>(run)
814 << " into non_full_runs_[" << std::dec << idx << "]";
815 }
816 }
817 }
818 }
819 return bracket_size;
820 }
821
BitMapToStr(uint32_t * bit_map_base,size_t num_vec)822 std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
823 std::string bit_map_str;
824 for (size_t v = 0; v < num_vec; v++) {
825 uint32_t vec = bit_map_base[v];
826 if (v != num_vec - 1) {
827 bit_map_str.append(StringPrintf("%x-", vec));
828 } else {
829 bit_map_str.append(StringPrintf("%x", vec));
830 }
831 }
832 return bit_map_str.c_str();
833 }
834
Dump()835 std::string RosAlloc::Run::Dump() {
836 size_t idx = size_bracket_idx_;
837 size_t num_slots = numOfSlots[idx];
838 size_t num_vec = RoundUp(num_slots, 32) / 32;
839 std::ostringstream stream;
840 stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
841 << "{ magic_num=" << static_cast<int>(magic_num_)
842 << " size_bracket_idx=" << idx
843 << " is_thread_local=" << static_cast<int>(is_thread_local_)
844 << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
845 << " first_search_vec_idx=" << first_search_vec_idx_
846 << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
847 << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
848 << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
849 << " }" << std::endl;
850 return stream.str();
851 }
852
AllocSlot()853 inline void* RosAlloc::Run::AllocSlot() {
854 const size_t idx = size_bracket_idx_;
855 while (true) {
856 if (kIsDebugBuild) {
857 // Make sure that no slots leaked, the bitmap should be full for all previous vectors.
858 for (size_t i = 0; i < first_search_vec_idx_; ++i) {
859 CHECK_EQ(~alloc_bit_map_[i], 0U);
860 }
861 }
862 uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
863 uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
864 if (LIKELY(ffz1 != 0)) {
865 const uint32_t ffz = ffz1 - 1;
866 const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
867 const uint32_t mask = 1U << ffz;
868 DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
869 // Found an empty slot. Set the bit.
870 DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
871 *alloc_bitmap_ptr |= mask;
872 DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
873 byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
874 if (kTraceRosAlloc) {
875 LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
876 << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
877 }
878 return slot_addr;
879 }
880 const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
881 if (first_search_vec_idx_ + 1 >= num_words) {
882 DCHECK(IsFull());
883 // Already at the last word, return null.
884 return nullptr;
885 }
886 // Increase the index to the next word and try again.
887 ++first_search_vec_idx_;
888 }
889 }
890
FreeSlot(void * ptr)891 void RosAlloc::Run::FreeSlot(void* ptr) {
892 DCHECK(!IsThreadLocal());
893 const byte idx = size_bracket_idx_;
894 const size_t bracket_size = bracketSizes[idx];
895 const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
896 - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
897 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
898 size_t slot_idx = offset_from_slot_base / bracket_size;
899 DCHECK_LT(slot_idx, numOfSlots[idx]);
900 size_t vec_idx = slot_idx / 32;
901 if (kIsDebugBuild) {
902 size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
903 DCHECK_LT(vec_idx, num_vec);
904 }
905 size_t vec_off = slot_idx % 32;
906 uint32_t* vec = &alloc_bit_map_[vec_idx];
907 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
908 const uint32_t mask = 1U << vec_off;
909 DCHECK_NE(*vec & mask, 0U);
910 *vec &= ~mask;
911 DCHECK_EQ(*vec & mask, 0U);
912 // Zero out the memory.
913 // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
914 memset(ptr, 0, bracket_size);
915 if (kTraceRosAlloc) {
916 LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
917 << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
918 }
919 }
920
MergeThreadLocalFreeBitMapToAllocBitMap(bool * is_all_free_after_out)921 inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
922 DCHECK(IsThreadLocal());
923 // Free slots in the alloc bit map based on the thread local free bit map.
924 const size_t idx = size_bracket_idx_;
925 const size_t num_of_slots = numOfSlots[idx];
926 const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
927 bool changed = false;
928 uint32_t* vecp = &alloc_bit_map_[0];
929 uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
930 bool is_all_free_after = true;
931 for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
932 uint32_t tl_free_vec = *tl_free_vecp;
933 uint32_t vec_before = *vecp;
934 uint32_t vec_after;
935 if (tl_free_vec != 0) {
936 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
937 vec_after = vec_before & ~tl_free_vec;
938 *vecp = vec_after;
939 changed = true;
940 *tl_free_vecp = 0; // clear the thread local free bit map.
941 } else {
942 vec_after = vec_before;
943 }
944 if (vec_after != 0) {
945 if (v == num_vec - 1) {
946 // Only not all free if a bit other than the mask bits are set.
947 is_all_free_after =
948 is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
949 } else {
950 is_all_free_after = false;
951 }
952 }
953 DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
954 }
955 *is_all_free_after_out = is_all_free_after;
956 // Return true if there was at least a bit set in the thread-local
957 // free bit map and at least a bit in the alloc bit map changed.
958 return changed;
959 }
960
MergeBulkFreeBitMapIntoAllocBitMap()961 inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
962 DCHECK(!IsThreadLocal());
963 // Free slots in the alloc bit map based on the bulk free bit map.
964 const size_t num_vec = NumberOfBitmapVectors();
965 uint32_t* vecp = &alloc_bit_map_[0];
966 uint32_t* free_vecp = &BulkFreeBitMap()[0];
967 for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
968 uint32_t free_vec = *free_vecp;
969 if (free_vec != 0) {
970 first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
971 *vecp &= ~free_vec;
972 *free_vecp = 0; // clear the bulk free bit map.
973 }
974 DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
975 }
976 }
977
UnionBulkFreeBitMapToThreadLocalFreeBitMap()978 inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
979 DCHECK(IsThreadLocal());
980 // Union the thread local bit map with the bulk free bit map.
981 size_t num_vec = NumberOfBitmapVectors();
982 uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
983 uint32_t* from_vecp = &BulkFreeBitMap()[0];
984 for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
985 uint32_t from_vec = *from_vecp;
986 if (from_vec != 0) {
987 *to_vecp |= from_vec;
988 *from_vecp = 0; // clear the bulk free bit map.
989 }
990 DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
991 }
992 }
993
MarkThreadLocalFreeBitMap(void * ptr)994 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
995 DCHECK(IsThreadLocal());
996 MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
997 }
998
MarkBulkFreeBitMap(void * ptr)999 inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
1000 return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
1001 }
1002
MarkFreeBitMapShared(void * ptr,uint32_t * free_bit_map_base,const char * caller_name)1003 inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
1004 const char* caller_name) {
1005 const byte idx = size_bracket_idx_;
1006 const size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
1007 - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
1008 const size_t bracket_size = bracketSizes[idx];
1009 memset(ptr, 0, bracket_size);
1010 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
1011 size_t slot_idx = offset_from_slot_base / bracket_size;
1012 DCHECK_LT(slot_idx, numOfSlots[idx]);
1013 size_t vec_idx = slot_idx / 32;
1014 if (kIsDebugBuild) {
1015 size_t num_vec = NumberOfBitmapVectors();
1016 DCHECK_LT(vec_idx, num_vec);
1017 }
1018 size_t vec_off = slot_idx % 32;
1019 uint32_t* vec = &free_bit_map_base[vec_idx];
1020 const uint32_t mask = 1U << vec_off;
1021 DCHECK_EQ(*vec & mask, 0U);
1022 *vec |= mask;
1023 DCHECK_NE(*vec & mask, 0U);
1024 if (kTraceRosAlloc) {
1025 LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
1026 << reinterpret_cast<intptr_t>(ptr)
1027 << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
1028 }
1029 return bracket_size;
1030 }
1031
GetBitmapLastVectorMask(size_t num_slots,size_t num_vec)1032 inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
1033 const size_t kBitsPerVec = 32;
1034 DCHECK_GE(num_slots * kBitsPerVec, num_vec);
1035 size_t remain = num_vec * kBitsPerVec - num_slots;
1036 DCHECK_NE(remain, kBitsPerVec);
1037 return ((1U << remain) - 1) << (kBitsPerVec - remain);
1038 }
1039
IsAllFree()1040 inline bool RosAlloc::Run::IsAllFree() {
1041 const byte idx = size_bracket_idx_;
1042 const size_t num_slots = numOfSlots[idx];
1043 const size_t num_vec = NumberOfBitmapVectors();
1044 DCHECK_NE(num_vec, 0U);
1045 // Check the last vector after the loop since it uses a special case for the masked bits.
1046 for (size_t v = 0; v < num_vec - 1; v++) {
1047 uint32_t vec = alloc_bit_map_[v];
1048 if (vec != 0) {
1049 return false;
1050 }
1051 }
1052 // Make sure the last word is equal to the mask, all other bits must be 0.
1053 return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
1054 }
1055
IsFull()1056 inline bool RosAlloc::Run::IsFull() {
1057 const size_t num_vec = NumberOfBitmapVectors();
1058 for (size_t v = 0; v < num_vec; ++v) {
1059 if (~alloc_bit_map_[v] != 0) {
1060 return false;
1061 }
1062 }
1063 return true;
1064 }
1065
IsBulkFreeBitmapClean()1066 inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
1067 const size_t num_vec = NumberOfBitmapVectors();
1068 for (size_t v = 0; v < num_vec; v++) {
1069 uint32_t vec = BulkFreeBitMap()[v];
1070 if (vec != 0) {
1071 return false;
1072 }
1073 }
1074 return true;
1075 }
1076
IsThreadLocalFreeBitmapClean()1077 inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
1078 const size_t num_vec = NumberOfBitmapVectors();
1079 for (size_t v = 0; v < num_vec; v++) {
1080 uint32_t vec = ThreadLocalFreeBitMap()[v];
1081 if (vec != 0) {
1082 return false;
1083 }
1084 }
1085 return true;
1086 }
1087
SetAllocBitMapBitsForInvalidSlots()1088 inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
1089 const size_t idx = size_bracket_idx_;
1090 const size_t num_slots = numOfSlots[idx];
1091 const size_t num_vec = RoundUp(num_slots, 32) / 32;
1092 DCHECK_NE(num_vec, 0U);
1093 // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
1094 // don't represent valid slots.
1095 alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
1096 }
1097
ZeroHeader()1098 inline void RosAlloc::Run::ZeroHeader() {
1099 const byte idx = size_bracket_idx_;
1100 memset(this, 0, headerSizes[idx]);
1101 }
1102
ZeroData()1103 inline void RosAlloc::Run::ZeroData() {
1104 const byte idx = size_bracket_idx_;
1105 byte* slot_begin = reinterpret_cast<byte*>(this) + headerSizes[idx];
1106 memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
1107 }
1108
FillAllocBitMap()1109 inline void RosAlloc::Run::FillAllocBitMap() {
1110 size_t num_vec = NumberOfBitmapVectors();
1111 memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
1112 first_search_vec_idx_ = num_vec - 1; // No free bits in any of the bitmap words.
1113 }
1114
InspectAllSlots(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1115 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1116 void* arg) {
1117 size_t idx = size_bracket_idx_;
1118 byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
1119 size_t num_slots = numOfSlots[idx];
1120 size_t bracket_size = IndexToBracketSize(idx);
1121 DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
1122 size_t num_vec = RoundUp(num_slots, 32) / 32;
1123 size_t slots = 0;
1124 for (size_t v = 0; v < num_vec; v++, slots += 32) {
1125 DCHECK_GE(num_slots, slots);
1126 uint32_t vec = alloc_bit_map_[v];
1127 size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
1128 for (size_t i = 0; i < end; ++i) {
1129 bool is_allocated = ((vec >> i) & 0x1) != 0;
1130 byte* slot_addr = slot_base + (slots + i) * bracket_size;
1131 if (is_allocated) {
1132 handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
1133 } else {
1134 handler(slot_addr, slot_addr + bracket_size, 0, arg);
1135 }
1136 }
1137 }
1138 }
1139
1140 // If true, read the page map entries in BulkFree() without using the
1141 // lock for better performance, assuming that the existence of an
1142 // allocated chunk/pointer being freed in BulkFree() guarantees that
1143 // the page map entry won't change. Disabled for now.
1144 static constexpr bool kReadPageMapEntryWithoutLockInBulkFree = true;
1145
BulkFree(Thread * self,void ** ptrs,size_t num_ptrs)1146 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
1147 size_t freed_bytes = 0;
1148 if (false) {
1149 // Used only to test Free() as GC uses only BulkFree().
1150 for (size_t i = 0; i < num_ptrs; ++i) {
1151 freed_bytes += FreeInternal(self, ptrs[i]);
1152 }
1153 return freed_bytes;
1154 }
1155
1156 WriterMutexLock wmu(self, bulk_free_lock_);
1157
1158 // First mark slots to free in the bulk free bit map without locking the
1159 // size bracket locks. On host, unordered_set is faster than vector + flag.
1160 #ifdef HAVE_ANDROID_OS
1161 std::vector<Run*> runs;
1162 #else
1163 std::unordered_set<Run*, hash_run, eq_run> runs;
1164 #endif
1165 for (size_t i = 0; i < num_ptrs; i++) {
1166 void* ptr = ptrs[i];
1167 DCHECK_LE(base_, ptr);
1168 DCHECK_LT(ptr, base_ + footprint_);
1169 size_t pm_idx = RoundDownToPageMapIndex(ptr);
1170 Run* run = nullptr;
1171 if (kReadPageMapEntryWithoutLockInBulkFree) {
1172 // Read the page map entries without locking the lock.
1173 byte page_map_entry = page_map_[pm_idx];
1174 if (kTraceRosAlloc) {
1175 LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1176 << std::dec << pm_idx
1177 << ", page_map_entry=" << static_cast<int>(page_map_entry);
1178 }
1179 if (LIKELY(page_map_entry == kPageMapRun)) {
1180 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1181 } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1182 size_t pi = pm_idx;
1183 // Find the beginning of the run.
1184 do {
1185 --pi;
1186 DCHECK_LT(pi, capacity_ / kPageSize);
1187 } while (page_map_[pi] != kPageMapRun);
1188 run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1189 } else if (page_map_entry == kPageMapLargeObject) {
1190 MutexLock mu(self, lock_);
1191 freed_bytes += FreePages(self, ptr, false);
1192 continue;
1193 } else {
1194 LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
1195 }
1196 } else {
1197 // Read the page map entries with a lock.
1198 MutexLock mu(self, lock_);
1199 DCHECK_LT(pm_idx, page_map_size_);
1200 byte page_map_entry = page_map_[pm_idx];
1201 if (kTraceRosAlloc) {
1202 LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
1203 << std::dec << pm_idx
1204 << ", page_map_entry=" << static_cast<int>(page_map_entry);
1205 }
1206 if (LIKELY(page_map_entry == kPageMapRun)) {
1207 run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1208 } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
1209 size_t pi = pm_idx;
1210 // Find the beginning of the run.
1211 do {
1212 --pi;
1213 DCHECK_LT(pi, capacity_ / kPageSize);
1214 } while (page_map_[pi] != kPageMapRun);
1215 run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
1216 } else if (page_map_entry == kPageMapLargeObject) {
1217 freed_bytes += FreePages(self, ptr, false);
1218 continue;
1219 } else {
1220 LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
1221 }
1222 }
1223 DCHECK(run != nullptr);
1224 DCHECK_EQ(run->magic_num_, kMagicNum);
1225 // Set the bit in the bulk free bit map.
1226 freed_bytes += run->MarkBulkFreeBitMap(ptr);
1227 #ifdef HAVE_ANDROID_OS
1228 if (!run->to_be_bulk_freed_) {
1229 run->to_be_bulk_freed_ = true;
1230 runs.push_back(run);
1231 }
1232 #else
1233 runs.insert(run);
1234 #endif
1235 }
1236
1237 // Now, iterate over the affected runs and update the alloc bit map
1238 // based on the bulk free bit map (for non-thread-local runs) and
1239 // union the bulk free bit map into the thread-local free bit map
1240 // (for thread-local runs.)
1241 for (Run* run : runs) {
1242 #ifdef HAVE_ANDROID_OS
1243 DCHECK(run->to_be_bulk_freed_);
1244 run->to_be_bulk_freed_ = false;
1245 #endif
1246 size_t idx = run->size_bracket_idx_;
1247 MutexLock mu(self, *size_bracket_locks_[idx]);
1248 if (run->IsThreadLocal()) {
1249 DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
1250 DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
1251 DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
1252 run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
1253 if (kTraceRosAlloc) {
1254 LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
1255 << std::hex << reinterpret_cast<intptr_t>(run);
1256 }
1257 DCHECK(run->IsThreadLocal());
1258 // A thread local run will be kept as a thread local even if
1259 // it's become all free.
1260 } else {
1261 bool run_was_full = run->IsFull();
1262 run->MergeBulkFreeBitMapIntoAllocBitMap();
1263 if (kTraceRosAlloc) {
1264 LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
1265 << reinterpret_cast<intptr_t>(run);
1266 }
1267 // Check if the run should be moved to non_full_runs_ or
1268 // free_page_runs_.
1269 std::set<Run*>* non_full_runs = &non_full_runs_[idx];
1270 std::unordered_set<Run*, hash_run, eq_run>* full_runs =
1271 kIsDebugBuild ? &full_runs_[idx] : NULL;
1272 if (run->IsAllFree()) {
1273 // It has just become completely free. Free the pages of the
1274 // run.
1275 bool run_was_current = run == current_runs_[idx];
1276 if (run_was_current) {
1277 DCHECK(full_runs->find(run) == full_runs->end());
1278 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1279 // If it was a current run, reuse it.
1280 } else if (run_was_full) {
1281 // If it was full, remove it from the full run set (debug
1282 // only.)
1283 if (kIsDebugBuild) {
1284 std::unordered_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
1285 DCHECK(pos != full_runs->end());
1286 full_runs->erase(pos);
1287 if (kTraceRosAlloc) {
1288 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1289 << reinterpret_cast<intptr_t>(run)
1290 << " from full_runs_";
1291 }
1292 DCHECK(full_runs->find(run) == full_runs->end());
1293 }
1294 } else {
1295 // If it was in a non full run set, remove it from the set.
1296 DCHECK(full_runs->find(run) == full_runs->end());
1297 DCHECK(non_full_runs->find(run) != non_full_runs->end());
1298 non_full_runs->erase(run);
1299 if (kTraceRosAlloc) {
1300 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1301 << reinterpret_cast<intptr_t>(run)
1302 << " from non_full_runs_";
1303 }
1304 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1305 }
1306 if (!run_was_current) {
1307 run->ZeroHeader();
1308 MutexLock mu(self, lock_);
1309 FreePages(self, run, true);
1310 }
1311 } else {
1312 // It is not completely free. If it wasn't the current run or
1313 // already in the non-full run set (i.e., it was full) insert
1314 // it into the non-full run set.
1315 if (run == current_runs_[idx]) {
1316 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1317 DCHECK(full_runs->find(run) == full_runs->end());
1318 // If it was a current run, keep it.
1319 } else if (run_was_full) {
1320 // If it was full, remove it from the full run set (debug
1321 // only) and insert into the non-full run set.
1322 DCHECK(full_runs->find(run) != full_runs->end());
1323 DCHECK(non_full_runs->find(run) == non_full_runs->end());
1324 if (kIsDebugBuild) {
1325 full_runs->erase(run);
1326 if (kTraceRosAlloc) {
1327 LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
1328 << reinterpret_cast<intptr_t>(run)
1329 << " from full_runs_";
1330 }
1331 }
1332 non_full_runs->insert(run);
1333 if (kTraceRosAlloc) {
1334 LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
1335 << reinterpret_cast<intptr_t>(run)
1336 << " into non_full_runs_[" << std::dec << idx;
1337 }
1338 } else {
1339 // If it was not full, so leave it in the non full run set.
1340 DCHECK(full_runs->find(run) == full_runs->end());
1341 DCHECK(non_full_runs->find(run) != non_full_runs->end());
1342 }
1343 }
1344 }
1345 }
1346 return freed_bytes;
1347 }
1348
DumpPageMap()1349 std::string RosAlloc::DumpPageMap() {
1350 std::ostringstream stream;
1351 stream << "RosAlloc PageMap: " << std::endl;
1352 lock_.AssertHeld(Thread::Current());
1353 size_t end = page_map_size_;
1354 FreePageRun* curr_fpr = NULL;
1355 size_t curr_fpr_size = 0;
1356 size_t remaining_curr_fpr_size = 0;
1357 size_t num_running_empty_pages = 0;
1358 for (size_t i = 0; i < end; ++i) {
1359 byte pm = page_map_[i];
1360 switch (pm) {
1361 case kPageMapReleased:
1362 // Fall-through.
1363 case kPageMapEmpty: {
1364 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1365 if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
1366 // Encountered a fresh free page run.
1367 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1368 DCHECK(fpr->IsFree());
1369 DCHECK(curr_fpr == NULL);
1370 DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
1371 curr_fpr = fpr;
1372 curr_fpr_size = fpr->ByteSize(this);
1373 DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
1374 remaining_curr_fpr_size = curr_fpr_size - kPageSize;
1375 stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
1376 << " (FPR start) fpr_size=" << curr_fpr_size
1377 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1378 if (remaining_curr_fpr_size == 0) {
1379 // Reset at the end of the current free page run.
1380 curr_fpr = NULL;
1381 curr_fpr_size = 0;
1382 }
1383 stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
1384 DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
1385 } else {
1386 // Still part of the current free page run.
1387 DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
1388 DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
1389 DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
1390 DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
1391 remaining_curr_fpr_size -= kPageSize;
1392 stream << "[" << i << "]=Empty (FPR part)"
1393 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
1394 if (remaining_curr_fpr_size == 0) {
1395 // Reset at the end of the current free page run.
1396 curr_fpr = NULL;
1397 curr_fpr_size = 0;
1398 }
1399 }
1400 num_running_empty_pages++;
1401 break;
1402 }
1403 case kPageMapLargeObject: {
1404 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1405 num_running_empty_pages = 0;
1406 stream << "[" << i << "]=Large (start)" << std::endl;
1407 break;
1408 }
1409 case kPageMapLargeObjectPart:
1410 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1411 num_running_empty_pages = 0;
1412 stream << "[" << i << "]=Large (part)" << std::endl;
1413 break;
1414 case kPageMapRun: {
1415 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1416 num_running_empty_pages = 0;
1417 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1418 size_t idx = run->size_bracket_idx_;
1419 stream << "[" << i << "]=Run (start)"
1420 << " idx=" << idx
1421 << " numOfPages=" << numOfPages[idx]
1422 << " is_thread_local=" << run->is_thread_local_
1423 << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
1424 << std::endl;
1425 break;
1426 }
1427 case kPageMapRunPart:
1428 DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
1429 num_running_empty_pages = 0;
1430 stream << "[" << i << "]=Run (part)" << std::endl;
1431 break;
1432 default:
1433 stream << "[" << i << "]=Unrecognizable page map type: " << pm;
1434 break;
1435 }
1436 }
1437 return stream.str();
1438 }
1439
UsableSize(void * ptr)1440 size_t RosAlloc::UsableSize(void* ptr) {
1441 DCHECK_LE(base_, ptr);
1442 DCHECK_LT(ptr, base_ + footprint_);
1443 size_t pm_idx = RoundDownToPageMapIndex(ptr);
1444 MutexLock mu(Thread::Current(), lock_);
1445 switch (page_map_[pm_idx]) {
1446 case kPageMapReleased:
1447 // Fall-through.
1448 case kPageMapEmpty:
1449 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1450 << std::hex << reinterpret_cast<intptr_t>(ptr);
1451 break;
1452 case kPageMapLargeObject: {
1453 size_t num_pages = 1;
1454 size_t idx = pm_idx + 1;
1455 size_t end = page_map_size_;
1456 while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
1457 num_pages++;
1458 idx++;
1459 }
1460 return num_pages * kPageSize;
1461 }
1462 case kPageMapLargeObjectPart:
1463 LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
1464 << std::hex << reinterpret_cast<intptr_t>(ptr);
1465 break;
1466 case kPageMapRun:
1467 case kPageMapRunPart: {
1468 // Find the beginning of the run.
1469 while (page_map_[pm_idx] != kPageMapRun) {
1470 pm_idx--;
1471 DCHECK_LT(pm_idx, capacity_ / kPageSize);
1472 }
1473 DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
1474 Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
1475 DCHECK_EQ(run->magic_num_, kMagicNum);
1476 size_t idx = run->size_bracket_idx_;
1477 size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
1478 - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
1479 DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
1480 return IndexToBracketSize(idx);
1481 }
1482 default: {
1483 LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
1484 break;
1485 }
1486 }
1487 return 0;
1488 }
1489
Trim()1490 bool RosAlloc::Trim() {
1491 MutexLock mu(Thread::Current(), lock_);
1492 FreePageRun* last_free_page_run;
1493 DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
1494 auto it = free_page_runs_.rbegin();
1495 if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
1496 // Remove the last free page run, if any.
1497 DCHECK(last_free_page_run->IsFree());
1498 DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
1499 DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
1500 DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
1501 free_page_runs_.erase(last_free_page_run);
1502 size_t decrement = last_free_page_run->ByteSize(this);
1503 size_t new_footprint = footprint_ - decrement;
1504 DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
1505 size_t new_num_of_pages = new_footprint / kPageSize;
1506 DCHECK_GE(page_map_size_, new_num_of_pages);
1507 // Zero out the tail of the page map.
1508 byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
1509 byte* madvise_begin = AlignUp(zero_begin, kPageSize);
1510 DCHECK_LE(madvise_begin, page_map_mem_map_->End());
1511 size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
1512 if (madvise_size > 0) {
1513 DCHECK_ALIGNED(madvise_begin, kPageSize);
1514 DCHECK_EQ(RoundUp(madvise_size, kPageSize), madvise_size);
1515 if (!kMadviseZeroes) {
1516 memset(madvise_begin, 0, madvise_size);
1517 }
1518 CHECK_EQ(madvise(madvise_begin, madvise_size, MADV_DONTNEED), 0);
1519 }
1520 if (madvise_begin - zero_begin) {
1521 memset(zero_begin, 0, madvise_begin - zero_begin);
1522 }
1523 page_map_size_ = new_num_of_pages;
1524 free_page_run_size_map_.resize(new_num_of_pages);
1525 DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
1526 art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
1527 if (kTraceRosAlloc) {
1528 LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
1529 << footprint_ << " to " << new_footprint;
1530 }
1531 DCHECK_LT(new_footprint, footprint_);
1532 DCHECK_LT(new_footprint, capacity_);
1533 footprint_ = new_footprint;
1534 return true;
1535 }
1536 return false;
1537 }
1538
InspectAll(void (* handler)(void * start,void * end,size_t used_bytes,void * callback_arg),void * arg)1539 void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
1540 void* arg) {
1541 // Note: no need to use this to release pages as we already do so in FreePages().
1542 if (handler == NULL) {
1543 return;
1544 }
1545 MutexLock mu(Thread::Current(), lock_);
1546 size_t pm_end = page_map_size_;
1547 size_t i = 0;
1548 while (i < pm_end) {
1549 byte pm = page_map_[i];
1550 switch (pm) {
1551 case kPageMapReleased:
1552 // Fall-through.
1553 case kPageMapEmpty: {
1554 // The start of a free page run.
1555 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1556 DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
1557 size_t fpr_size = fpr->ByteSize(this);
1558 DCHECK(IsAligned<kPageSize>(fpr_size));
1559 void* start = fpr;
1560 if (kIsDebugBuild) {
1561 // In the debug build, the first page of a free page run
1562 // contains a magic number for debugging. Exclude it.
1563 start = reinterpret_cast<byte*>(fpr) + kPageSize;
1564 }
1565 void* end = reinterpret_cast<byte*>(fpr) + fpr_size;
1566 handler(start, end, 0, arg);
1567 size_t num_pages = fpr_size / kPageSize;
1568 if (kIsDebugBuild) {
1569 for (size_t j = i + 1; j < i + num_pages; ++j) {
1570 DCHECK(IsFreePage(j));
1571 }
1572 }
1573 i += fpr_size / kPageSize;
1574 DCHECK_LE(i, pm_end);
1575 break;
1576 }
1577 case kPageMapLargeObject: {
1578 // The start of a large object.
1579 size_t num_pages = 1;
1580 size_t idx = i + 1;
1581 while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1582 num_pages++;
1583 idx++;
1584 }
1585 void* start = base_ + i * kPageSize;
1586 void* end = base_ + (i + num_pages) * kPageSize;
1587 size_t used_bytes = num_pages * kPageSize;
1588 handler(start, end, used_bytes, arg);
1589 if (kIsDebugBuild) {
1590 for (size_t j = i + 1; j < i + num_pages; ++j) {
1591 DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
1592 }
1593 }
1594 i += num_pages;
1595 DCHECK_LE(i, pm_end);
1596 break;
1597 }
1598 case kPageMapLargeObjectPart:
1599 LOG(FATAL) << "Unreachable - page map type: " << pm;
1600 break;
1601 case kPageMapRun: {
1602 // The start of a run.
1603 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1604 DCHECK_EQ(run->magic_num_, kMagicNum);
1605 // The dedicated full run doesn't contain any real allocations, don't visit the slots in
1606 // there.
1607 run->InspectAllSlots(handler, arg);
1608 size_t num_pages = numOfPages[run->size_bracket_idx_];
1609 if (kIsDebugBuild) {
1610 for (size_t j = i + 1; j < i + num_pages; ++j) {
1611 DCHECK_EQ(page_map_[j], kPageMapRunPart);
1612 }
1613 }
1614 i += num_pages;
1615 DCHECK_LE(i, pm_end);
1616 break;
1617 }
1618 case kPageMapRunPart:
1619 LOG(FATAL) << "Unreachable - page map type: " << pm;
1620 break;
1621 default:
1622 LOG(FATAL) << "Unreachable - page map type: " << pm;
1623 break;
1624 }
1625 }
1626 }
1627
Footprint()1628 size_t RosAlloc::Footprint() {
1629 MutexLock mu(Thread::Current(), lock_);
1630 return footprint_;
1631 }
1632
FootprintLimit()1633 size_t RosAlloc::FootprintLimit() {
1634 MutexLock mu(Thread::Current(), lock_);
1635 return capacity_;
1636 }
1637
SetFootprintLimit(size_t new_capacity)1638 void RosAlloc::SetFootprintLimit(size_t new_capacity) {
1639 MutexLock mu(Thread::Current(), lock_);
1640 DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
1641 // Only growing is supported here. But Trim() is supported.
1642 if (capacity_ < new_capacity) {
1643 CHECK_LE(new_capacity, max_capacity_);
1644 capacity_ = new_capacity;
1645 VLOG(heap) << "new capacity=" << capacity_;
1646 }
1647 }
1648
RevokeThreadLocalRuns(Thread * thread)1649 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
1650 Thread* self = Thread::Current();
1651 // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1652 ReaderMutexLock wmu(self, bulk_free_lock_);
1653 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1654 MutexLock mu(self, *size_bracket_locks_[idx]);
1655 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1656 CHECK(thread_local_run != nullptr);
1657 // Invalid means already revoked.
1658 DCHECK(thread_local_run->IsThreadLocal());
1659 if (thread_local_run != dedicated_full_run_) {
1660 thread->SetRosAllocRun(idx, dedicated_full_run_);
1661 DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
1662 // Note the thread local run may not be full here.
1663 bool dont_care;
1664 thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
1665 thread_local_run->SetIsThreadLocal(false);
1666 thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
1667 DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
1668 DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
1669 RevokeRun(self, idx, thread_local_run);
1670 }
1671 }
1672 }
1673
RevokeRun(Thread * self,size_t idx,Run * run)1674 void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) {
1675 size_bracket_locks_[idx]->AssertHeld(self);
1676 DCHECK(run != dedicated_full_run_);
1677 if (run->IsFull()) {
1678 if (kIsDebugBuild) {
1679 full_runs_[idx].insert(run);
1680 DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
1681 if (kTraceRosAlloc) {
1682 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1683 << reinterpret_cast<intptr_t>(run)
1684 << " into full_runs_[" << std::dec << idx << "]";
1685 }
1686 }
1687 } else if (run->IsAllFree()) {
1688 run->ZeroHeader();
1689 MutexLock mu(self, lock_);
1690 FreePages(self, run, true);
1691 } else {
1692 non_full_runs_[idx].insert(run);
1693 DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
1694 if (kTraceRosAlloc) {
1695 LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
1696 << reinterpret_cast<intptr_t>(run)
1697 << " into non_full_runs_[" << std::dec << idx << "]";
1698 }
1699 }
1700 }
1701
RevokeThreadUnsafeCurrentRuns()1702 void RosAlloc::RevokeThreadUnsafeCurrentRuns() {
1703 // Revoke the current runs which share the same idx as thread local runs.
1704 Thread* self = Thread::Current();
1705 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1706 MutexLock mu(self, *size_bracket_locks_[idx]);
1707 if (current_runs_[idx] != dedicated_full_run_) {
1708 RevokeRun(self, idx, current_runs_[idx]);
1709 current_runs_[idx] = dedicated_full_run_;
1710 }
1711 }
1712 }
1713
RevokeAllThreadLocalRuns()1714 void RosAlloc::RevokeAllThreadLocalRuns() {
1715 // This is called when a mutator thread won't allocate such as at
1716 // the Zygote creation time or during the GC pause.
1717 MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
1718 MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
1719 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1720 for (Thread* thread : thread_list) {
1721 RevokeThreadLocalRuns(thread);
1722 }
1723 RevokeThreadUnsafeCurrentRuns();
1724 }
1725
AssertThreadLocalRunsAreRevoked(Thread * thread)1726 void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
1727 if (kIsDebugBuild) {
1728 Thread* self = Thread::Current();
1729 // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
1730 ReaderMutexLock wmu(self, bulk_free_lock_);
1731 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
1732 MutexLock mu(self, *size_bracket_locks_[idx]);
1733 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx));
1734 DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_);
1735 }
1736 }
1737 }
1738
AssertAllThreadLocalRunsAreRevoked()1739 void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
1740 if (kIsDebugBuild) {
1741 Thread* self = Thread::Current();
1742 MutexLock mu(self, *Locks::runtime_shutdown_lock_);
1743 MutexLock mu2(self, *Locks::thread_list_lock_);
1744 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
1745 for (Thread* t : thread_list) {
1746 AssertThreadLocalRunsAreRevoked(t);
1747 }
1748 for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) {
1749 MutexLock mu(self, *size_bracket_locks_[idx]);
1750 CHECK_EQ(current_runs_[idx], dedicated_full_run_);
1751 }
1752 }
1753 }
1754
Initialize()1755 void RosAlloc::Initialize() {
1756 // bracketSizes.
1757 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1758 if (i < kNumOfSizeBrackets - 2) {
1759 bracketSizes[i] = 16 * (i + 1);
1760 } else if (i == kNumOfSizeBrackets - 2) {
1761 bracketSizes[i] = 1 * KB;
1762 } else {
1763 DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1764 bracketSizes[i] = 2 * KB;
1765 }
1766 if (kTraceRosAlloc) {
1767 LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
1768 }
1769 }
1770 // numOfPages.
1771 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1772 if (i < 4) {
1773 numOfPages[i] = 1;
1774 } else if (i < 8) {
1775 numOfPages[i] = 1;
1776 } else if (i < 16) {
1777 numOfPages[i] = 4;
1778 } else if (i < 32) {
1779 numOfPages[i] = 8;
1780 } else if (i == 32) {
1781 DCHECK_EQ(i, kNumOfSizeBrackets - 2);
1782 numOfPages[i] = 16;
1783 } else {
1784 DCHECK_EQ(i, kNumOfSizeBrackets - 1);
1785 numOfPages[i] = 32;
1786 }
1787 if (kTraceRosAlloc) {
1788 LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
1789 }
1790 }
1791 // Compute numOfSlots and slotOffsets.
1792 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1793 size_t bracket_size = bracketSizes[i];
1794 size_t run_size = kPageSize * numOfPages[i];
1795 size_t max_num_of_slots = run_size / bracket_size;
1796 // Compute the actual number of slots by taking the header and
1797 // alignment into account.
1798 size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
1799 DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
1800 size_t header_size = 0;
1801 size_t bulk_free_bit_map_offset = 0;
1802 size_t thread_local_free_bit_map_offset = 0;
1803 size_t num_of_slots = 0;
1804 // Search for the maximum number of slots that allows enough space
1805 // for the header (including the bit maps.)
1806 for (int s = max_num_of_slots; s >= 0; s--) {
1807 size_t tmp_slots_size = bracket_size * s;
1808 size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
1809 size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
1810 size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
1811 size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
1812 size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
1813 size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
1814 // Align up the unaligned header size. bracket_size may not be a power of two.
1815 size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
1816 tmp_unaligned_header_size :
1817 tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
1818 DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
1819 DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
1820 if (tmp_slots_size + tmp_header_size <= run_size) {
1821 // Found the right number of slots, that is, there was enough
1822 // space for the header (including the bit maps.)
1823 num_of_slots = s;
1824 header_size = tmp_header_size;
1825 bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
1826 thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
1827 break;
1828 }
1829 }
1830 DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
1831 // Add the padding for the alignment remainder.
1832 header_size += run_size % bracket_size;
1833 DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
1834 numOfSlots[i] = num_of_slots;
1835 headerSizes[i] = header_size;
1836 bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
1837 threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
1838 if (kTraceRosAlloc) {
1839 LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
1840 << ", headerSizes[" << i << "]=" << headerSizes[i]
1841 << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
1842 << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
1843 }
1844 }
1845 // Fill the alloc bitmap so nobody can successfully allocate from it.
1846 if (kIsDebugBuild) {
1847 dedicated_full_run_->magic_num_ = kMagicNum;
1848 }
1849 // It doesn't matter which size bracket we use since the main goal is to have the allocation
1850 // fail 100% of the time you attempt to allocate into the dedicated full run.
1851 dedicated_full_run_->size_bracket_idx_ = 0;
1852 dedicated_full_run_->FillAllocBitMap();
1853 dedicated_full_run_->SetIsThreadLocal(true);
1854 }
1855
BytesAllocatedCallback(void * start,void * end,size_t used_bytes,void * arg)1856 void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
1857 if (used_bytes == 0) {
1858 return;
1859 }
1860 size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
1861 *bytes_allocated += used_bytes;
1862 }
1863
ObjectsAllocatedCallback(void * start,void * end,size_t used_bytes,void * arg)1864 void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
1865 if (used_bytes == 0) {
1866 return;
1867 }
1868 size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
1869 ++(*objects_allocated);
1870 }
1871
Verify()1872 void RosAlloc::Verify() {
1873 Thread* self = Thread::Current();
1874 CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
1875 << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
1876 MutexLock mu(self, *Locks::thread_list_lock_);
1877 ReaderMutexLock wmu(self, bulk_free_lock_);
1878 std::vector<Run*> runs;
1879 {
1880 MutexLock mu(self, lock_);
1881 size_t pm_end = page_map_size_;
1882 size_t i = 0;
1883 while (i < pm_end) {
1884 byte pm = page_map_[i];
1885 switch (pm) {
1886 case kPageMapReleased:
1887 // Fall-through.
1888 case kPageMapEmpty: {
1889 // The start of a free page run.
1890 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
1891 DCHECK_EQ(fpr->magic_num_, kMagicNumFree);
1892 CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
1893 << "An empty page must belong to the free page run set";
1894 size_t fpr_size = fpr->ByteSize(this);
1895 CHECK(IsAligned<kPageSize>(fpr_size))
1896 << "A free page run size isn't page-aligned : " << fpr_size;
1897 size_t num_pages = fpr_size / kPageSize;
1898 CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1899 << "A free page run size must be > 0 : " << fpr_size;
1900 for (size_t j = i + 1; j < i + num_pages; ++j) {
1901 CHECK(IsFreePage(j))
1902 << "A mismatch between the page map table for kPageMapEmpty "
1903 << " at page index " << j
1904 << " and the free page run size : page index range : "
1905 << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
1906 }
1907 i += num_pages;
1908 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1909 << std::endl << DumpPageMap();
1910 break;
1911 }
1912 case kPageMapLargeObject: {
1913 // The start of a large object.
1914 size_t num_pages = 1;
1915 size_t idx = i + 1;
1916 while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
1917 num_pages++;
1918 idx++;
1919 }
1920 void* start = base_ + i * kPageSize;
1921 mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
1922 size_t obj_size = obj->SizeOf();
1923 CHECK_GT(obj_size, kLargeSizeThreshold)
1924 << "A rosalloc large object size must be > " << kLargeSizeThreshold;
1925 CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
1926 << "A rosalloc large object size " << obj_size
1927 << " does not match the page map table " << (num_pages * kPageSize)
1928 << std::endl << DumpPageMap();
1929 i += num_pages;
1930 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1931 << std::endl << DumpPageMap();
1932 break;
1933 }
1934 case kPageMapLargeObjectPart:
1935 LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
1936 break;
1937 case kPageMapRun: {
1938 // The start of a run.
1939 Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
1940 DCHECK_EQ(run->magic_num_, kMagicNum);
1941 size_t idx = run->size_bracket_idx_;
1942 CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
1943 size_t num_pages = numOfPages[idx];
1944 CHECK_GT(num_pages, static_cast<uintptr_t>(0))
1945 << "Run size must be > 0 : " << num_pages;
1946 for (size_t j = i + 1; j < i + num_pages; ++j) {
1947 CHECK_EQ(page_map_[j], kPageMapRunPart)
1948 << "A mismatch between the page map table for kPageMapRunPart "
1949 << " at page index " << j
1950 << " and the run size : page index range " << i << " to " << (i + num_pages)
1951 << std::endl << DumpPageMap();
1952 }
1953 // Don't verify the dedicated_full_run_ since it doesn't have any real allocations.
1954 runs.push_back(run);
1955 i += num_pages;
1956 CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
1957 << std::endl << DumpPageMap();
1958 break;
1959 }
1960 case kPageMapRunPart:
1961 // Fall-through.
1962 default:
1963 LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
1964 break;
1965 }
1966 }
1967 }
1968 std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList();
1969 for (Thread* thread : threads) {
1970 for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) {
1971 MutexLock mu(self, *size_bracket_locks_[i]);
1972 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
1973 CHECK(thread_local_run != nullptr);
1974 CHECK(thread_local_run->IsThreadLocal());
1975 CHECK(thread_local_run == dedicated_full_run_ ||
1976 thread_local_run->size_bracket_idx_ == i);
1977 }
1978 }
1979 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
1980 MutexLock mu(self, *size_bracket_locks_[i]);
1981 Run* current_run = current_runs_[i];
1982 CHECK(current_run != nullptr);
1983 if (current_run != dedicated_full_run_) {
1984 // The dedicated full run is currently marked as thread local.
1985 CHECK(!current_run->IsThreadLocal());
1986 CHECK_EQ(current_run->size_bracket_idx_, i);
1987 }
1988 }
1989 // Call Verify() here for the lock order.
1990 for (auto& run : runs) {
1991 run->Verify(self, this);
1992 }
1993 }
1994
Verify(Thread * self,RosAlloc * rosalloc)1995 void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
1996 DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
1997 const size_t idx = size_bracket_idx_;
1998 CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
1999 byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
2000 const size_t num_slots = numOfSlots[idx];
2001 const size_t num_vec = RoundUp(num_slots, 32) / 32;
2002 CHECK_GT(num_vec, 0U);
2003 size_t bracket_size = IndexToBracketSize(idx);
2004 CHECK_EQ(slot_base + num_slots * bracket_size,
2005 reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
2006 << "Mismatch in the end address of the run " << Dump();
2007 // Check that the bulk free bitmap is clean. It's only used during BulkFree().
2008 CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
2009 uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
2010 // Make sure all the bits at the end of the run are set so that we don't allocate there.
2011 CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
2012 // Ensure that the first bitmap index is valid.
2013 CHECK_LT(first_search_vec_idx_, num_vec);
2014 // Check the thread local runs, the current runs, and the run sets.
2015 if (IsThreadLocal()) {
2016 // If it's a thread local run, then it must be pointed to by an owner thread.
2017 bool owner_found = false;
2018 std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
2019 for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
2020 Thread* thread = *it;
2021 for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) {
2022 MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2023 Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i));
2024 if (thread_local_run == this) {
2025 CHECK(!owner_found)
2026 << "A thread local run has more than one owner thread " << Dump();
2027 CHECK_EQ(i, idx)
2028 << "A mismatching size bracket index in a thread local run " << Dump();
2029 owner_found = true;
2030 }
2031 }
2032 }
2033 CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
2034 } else {
2035 // If it's not thread local, check that the thread local free bitmap is clean.
2036 CHECK(IsThreadLocalFreeBitmapClean())
2037 << "A non-thread-local run's thread local free bitmap isn't clean "
2038 << Dump();
2039 // Check if it's a current run for the size bucket.
2040 bool is_current_run = false;
2041 for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
2042 MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
2043 Run* current_run = rosalloc->current_runs_[i];
2044 if (idx == i) {
2045 if (this == current_run) {
2046 is_current_run = true;
2047 }
2048 } else {
2049 // If the size bucket index does not match, then it must not
2050 // be a current run.
2051 CHECK_NE(this, current_run)
2052 << "A current run points to a run with a wrong size bracket index " << Dump();
2053 }
2054 }
2055 // If it's neither a thread local or current run, then it must be
2056 // in a run set.
2057 if (!is_current_run) {
2058 MutexLock mu(self, rosalloc->lock_);
2059 std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
2060 // If it's all free, it must be a free page run rather than a run.
2061 CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
2062 if (!IsFull()) {
2063 // If it's not full, it must in the non-full run set.
2064 CHECK(non_full_runs.find(this) != non_full_runs.end())
2065 << "A non-full run isn't in the non-full run set " << Dump();
2066 } else {
2067 // If it's full, it must in the full run set (debug build only.)
2068 if (kIsDebugBuild) {
2069 std::unordered_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
2070 CHECK(full_runs.find(this) != full_runs.end())
2071 << " A full run isn't in the full run set " << Dump();
2072 }
2073 }
2074 }
2075 }
2076 // Check each slot.
2077 size_t slots = 0;
2078 for (size_t v = 0; v < num_vec; v++, slots += 32) {
2079 DCHECK_GE(num_slots, slots) << "Out of bounds";
2080 uint32_t vec = alloc_bit_map_[v];
2081 uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
2082 size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
2083 for (size_t i = 0; i < end; ++i) {
2084 bool is_allocated = ((vec >> i) & 0x1) != 0;
2085 // If a thread local run, slots may be marked freed in the
2086 // thread local free bitmap.
2087 bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
2088 if (is_allocated && !is_thread_local_freed) {
2089 byte* slot_addr = slot_base + (slots + i) * bracket_size;
2090 mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
2091 size_t obj_size = obj->SizeOf();
2092 CHECK_LE(obj_size, kLargeSizeThreshold)
2093 << "A run slot contains a large object " << Dump();
2094 CHECK_EQ(SizeToIndex(obj_size), idx)
2095 << PrettyTypeOf(obj) << " "
2096 << "obj_size=" << obj_size << ", idx=" << idx << " "
2097 << "A run slot contains an object with wrong size " << Dump();
2098 }
2099 }
2100 }
2101 }
2102
ReleasePages()2103 size_t RosAlloc::ReleasePages() {
2104 VLOG(heap) << "RosAlloc::ReleasePages()";
2105 DCHECK(!DoesReleaseAllPages());
2106 Thread* self = Thread::Current();
2107 size_t reclaimed_bytes = 0;
2108 size_t i = 0;
2109 // Check the page map size which might have changed due to grow/shrink.
2110 while (i < page_map_size_) {
2111 // Reading the page map without a lock is racy but the race is benign since it should only
2112 // result in occasionally not releasing pages which we could release.
2113 byte pm = page_map_[i];
2114 switch (pm) {
2115 case kPageMapReleased:
2116 // Fall through.
2117 case kPageMapEmpty: {
2118 // This is currently the start of a free page run.
2119 // Acquire the lock to prevent other threads racing in and modifying the page map.
2120 MutexLock mu(self, lock_);
2121 // Check that it's still empty after we acquired the lock since another thread could have
2122 // raced in and placed an allocation here.
2123 if (IsFreePage(i)) {
2124 // Free page runs can start with a released page if we coalesced a released page free
2125 // page run with an empty page run.
2126 FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
2127 // There is a race condition where FreePage can coalesce fpr with the previous
2128 // free page run before we acquire lock_. In that case free_page_runs_.find will not find
2129 // a run starting at fpr. To handle this race, we skip reclaiming the page range and go
2130 // to the next page.
2131 if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
2132 size_t fpr_size = fpr->ByteSize(this);
2133 DCHECK(IsAligned<kPageSize>(fpr_size));
2134 byte* start = reinterpret_cast<byte*>(fpr);
2135 reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
2136 size_t pages = fpr_size / kPageSize;
2137 CHECK_GT(pages, 0U) << "Infinite loop probable";
2138 i += pages;
2139 DCHECK_LE(i, page_map_size_);
2140 break;
2141 }
2142 }
2143 // Fall through.
2144 }
2145 case kPageMapLargeObject: // Fall through.
2146 case kPageMapLargeObjectPart: // Fall through.
2147 case kPageMapRun: // Fall through.
2148 case kPageMapRunPart: // Fall through.
2149 ++i;
2150 break; // Skip.
2151 default:
2152 LOG(FATAL) << "Unreachable - page map type: " << pm;
2153 break;
2154 }
2155 }
2156 return reclaimed_bytes;
2157 }
2158
ReleasePageRange(byte * start,byte * end)2159 size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
2160 DCHECK_ALIGNED(start, kPageSize);
2161 DCHECK_ALIGNED(end, kPageSize);
2162 DCHECK_LT(start, end);
2163 if (kIsDebugBuild) {
2164 // In the debug build, the first page of a free page run
2165 // contains a magic number for debugging. Exclude it.
2166 start += kPageSize;
2167 }
2168 if (!kMadviseZeroes) {
2169 // TODO: Do this when we resurrect the page instead.
2170 memset(start, 0, end - start);
2171 }
2172 CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
2173 size_t pm_idx = ToPageMapIndex(start);
2174 size_t reclaimed_bytes = 0;
2175 // Calculate reclaimed bytes and upate page map.
2176 const size_t max_idx = pm_idx + (end - start) / kPageSize;
2177 for (; pm_idx < max_idx; ++pm_idx) {
2178 DCHECK(IsFreePage(pm_idx));
2179 if (page_map_[pm_idx] == kPageMapEmpty) {
2180 // Mark the page as released and update how many bytes we released.
2181 reclaimed_bytes += kPageSize;
2182 page_map_[pm_idx] = kPageMapReleased;
2183 }
2184 }
2185 return reclaimed_bytes;
2186 }
2187
LogFragmentationAllocFailure(std::ostream & os,size_t failed_alloc_bytes)2188 void RosAlloc::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) {
2189 Thread* self = Thread::Current();
2190 size_t largest_continuous_free_pages = 0;
2191 WriterMutexLock wmu(self, bulk_free_lock_);
2192 MutexLock mu(self, lock_);
2193 for (FreePageRun* fpr : free_page_runs_) {
2194 largest_continuous_free_pages = std::max(largest_continuous_free_pages,
2195 fpr->ByteSize(this));
2196 }
2197 if (failed_alloc_bytes > kLargeSizeThreshold) {
2198 // Large allocation.
2199 size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
2200 if (required_bytes > largest_continuous_free_pages) {
2201 os << "; failed due to fragmentation (required continguous free "
2202 << required_bytes << " bytes where largest contiguous free "
2203 << largest_continuous_free_pages << " bytes)";
2204 }
2205 } else {
2206 // Non-large allocation.
2207 size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
2208 if (required_bytes > largest_continuous_free_pages) {
2209 os << "; failed due to fragmentation (required continguous free "
2210 << required_bytes << " bytes for a new buffer where largest contiguous free "
2211 << largest_continuous_free_pages << " bytes)";
2212 }
2213 }
2214 }
2215
2216 } // namespace allocator
2217 } // namespace gc
2218 } // namespace art
2219